mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-02-13 02:42:45 +00:00
feat: introduce streamers that handle the stream and anonymization from github
This commit is contained in:
348
src/core/AnonymizedFile.ts
Normal file
348
src/core/AnonymizedFile.ts
Normal file
@@ -0,0 +1,348 @@
|
||||
import { join, basename } from "path";
|
||||
import { Response } from "express";
|
||||
import { Readable } from "stream";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { lookup } from "mime-types";
|
||||
|
||||
import Repository from "./Repository";
|
||||
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
|
||||
import config from "../config";
|
||||
import { anonymizePath, isTextFile } from "./anonymize-utils";
|
||||
import AnonymousError from "./AnonymousError";
|
||||
import { handleError } from "../server/routes/route-utils";
|
||||
import got from "got";
|
||||
|
||||
/**
|
||||
* Represent a file in a anonymized repository
|
||||
*/
|
||||
export default class AnonymizedFile {
|
||||
private _originalPath: string | undefined;
|
||||
private fileSize?: number;
|
||||
|
||||
repository: Repository;
|
||||
anonymizedPath: string;
|
||||
_sha?: string;
|
||||
|
||||
constructor(data: { repository: Repository; anonymizedPath: string }) {
|
||||
this.repository = data.repository;
|
||||
if (!this.repository.options.terms)
|
||||
throw new AnonymousError("terms_not_specified", {
|
||||
object: this,
|
||||
httpStatus: 400,
|
||||
});
|
||||
this.anonymizedPath = data.anonymizedPath;
|
||||
}
|
||||
|
||||
async sha() {
|
||||
return trace.getTracer("ano-file").startActiveSpan("sha", async (span) => {
|
||||
try {
|
||||
span.setAttribute("anonymizedPath", this.anonymizedPath);
|
||||
if (this._sha) return this._sha.replace(/"/g, "");
|
||||
await this.originalPath();
|
||||
return this._sha?.replace(/"/g, "");
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* De-anonymize the path
|
||||
*
|
||||
* @returns the origin relative path of the file
|
||||
*/
|
||||
async originalPath(): Promise<string> {
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("originalPath", async (span) => {
|
||||
try {
|
||||
span.setAttribute("anonymizedPath", this.anonymizedPath);
|
||||
if (this._originalPath) return this._originalPath;
|
||||
if (!this.anonymizedPath) {
|
||||
throw new AnonymousError("path_not_specified", {
|
||||
object: this,
|
||||
httpStatus: 400,
|
||||
});
|
||||
}
|
||||
|
||||
let currentOriginal = (await this.repository.files({
|
||||
force: false,
|
||||
})) as TreeElement;
|
||||
|
||||
const paths = this.anonymizedPath.trim().split("/");
|
||||
let currentOriginalPath = "";
|
||||
for (let i = 0; i < paths.length; i++) {
|
||||
const fileName = paths[i];
|
||||
if (fileName == "") {
|
||||
continue;
|
||||
}
|
||||
if (!(currentOriginal as Tree)[fileName]) {
|
||||
// anonymize all the file in the folder and check if there is one that match the current filename
|
||||
const options = [];
|
||||
for (let originalFileName in currentOriginal) {
|
||||
if (
|
||||
anonymizePath(
|
||||
originalFileName,
|
||||
this.repository.options.terms
|
||||
) == fileName
|
||||
) {
|
||||
options.push(originalFileName);
|
||||
}
|
||||
}
|
||||
|
||||
// if only one option we found the original filename
|
||||
if (options.length == 1) {
|
||||
currentOriginalPath = join(currentOriginalPath, options[0]);
|
||||
currentOriginal = (currentOriginal as Tree)[options[0]];
|
||||
} else if (options.length == 0) {
|
||||
throw new AnonymousError("file_not_found", {
|
||||
object: this,
|
||||
httpStatus: 404,
|
||||
});
|
||||
} else {
|
||||
const nextName = paths[i + 1];
|
||||
if (!nextName) {
|
||||
// if there is no next name we can't find the file and we return the first option
|
||||
currentOriginalPath = join(currentOriginalPath, options[0]);
|
||||
currentOriginal = (currentOriginal as Tree)[options[0]];
|
||||
}
|
||||
let found = false;
|
||||
for (const option of options) {
|
||||
const optionTree = (currentOriginal as Tree)[option];
|
||||
if ((optionTree as Tree).child) {
|
||||
const optionTreeChild = (optionTree as Tree).child;
|
||||
if ((optionTreeChild as Tree)[nextName]) {
|
||||
currentOriginalPath = join(currentOriginalPath, option);
|
||||
currentOriginal = optionTreeChild;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
// if we didn't find the next name we return the first option
|
||||
currentOriginalPath = join(currentOriginalPath, options[0]);
|
||||
currentOriginal = (currentOriginal as Tree)[options[0]];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
currentOriginalPath = join(currentOriginalPath, fileName);
|
||||
currentOriginal = (currentOriginal as Tree)[fileName];
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
currentOriginal.sha === undefined ||
|
||||
currentOriginal.size === undefined
|
||||
) {
|
||||
throw new AnonymousError("folder_not_supported", { object: this });
|
||||
}
|
||||
|
||||
const file = currentOriginal as TreeFile;
|
||||
this.fileSize = file.size;
|
||||
this._sha = file.sha;
|
||||
|
||||
this._originalPath = currentOriginalPath;
|
||||
return this._originalPath;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
});
|
||||
}
|
||||
extension() {
|
||||
const filename = basename(this.anonymizedPath);
|
||||
const extensions = filename.split(".").reverse();
|
||||
return extensions[0].toLowerCase();
|
||||
}
|
||||
isImage() {
|
||||
const extension = this.extension();
|
||||
return [
|
||||
"png",
|
||||
"jpg",
|
||||
"jpeg",
|
||||
"gif",
|
||||
"svg",
|
||||
"ico",
|
||||
"bmp",
|
||||
"tiff",
|
||||
"tif",
|
||||
"webp",
|
||||
"avif",
|
||||
"heif",
|
||||
"heic",
|
||||
].includes(extension);
|
||||
}
|
||||
|
||||
isFileSupported() {
|
||||
const extension = this.extension();
|
||||
if (!this.repository.options.pdf && extension == "pdf") {
|
||||
return false;
|
||||
}
|
||||
if (!this.repository.options.image && this.isImage()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
async content(): Promise<Readable> {
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("content", async (span) => {
|
||||
try {
|
||||
if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
|
||||
await this.originalPath();
|
||||
}
|
||||
span.addEvent("filePath", { originalPath: this.filePath });
|
||||
if (this.fileSize && this.fileSize > config.MAX_FILE_SIZE) {
|
||||
throw new AnonymousError("file_too_big", {
|
||||
object: this,
|
||||
httpStatus: 403,
|
||||
});
|
||||
}
|
||||
const content = await this.repository.source?.getFileContent(this);
|
||||
if (
|
||||
!this.repository.model.isReseted ||
|
||||
this.repository.status != RepositoryStatus.READY
|
||||
) {
|
||||
this.repository.model.isReseted = false;
|
||||
await this.repository.updateStatus(RepositoryStatus.READY);
|
||||
}
|
||||
return content;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async anonymizedContent() {
|
||||
const span = trace.getTracer("ano-file").startSpan("Repository.conference");
|
||||
span.setAttribute("anonymizedPath", this.anonymizedPath);
|
||||
const anonymizer = this.repository.generateAnonymizeTransformer(
|
||||
this.anonymizedPath
|
||||
);
|
||||
if (!config.STREAMER_ENTRYPOINT) {
|
||||
// collect the content locally
|
||||
const content = await this.content();
|
||||
return content.pipe(anonymizer).on("close", () => {
|
||||
span.end();
|
||||
});
|
||||
}
|
||||
// use the streamer service
|
||||
return got.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
|
||||
method: "POST",
|
||||
json: {
|
||||
token: await this.repository.getToken(),
|
||||
repoFullName: this.repository.model.source.repositoryName,
|
||||
commit: this.repository.model.source.commit,
|
||||
branch: this.repository.model.source.branch,
|
||||
repoId: this.repository.repoId,
|
||||
filePath: this.filePath,
|
||||
sha: await this.sha(),
|
||||
anonymizerOptions: anonymizer.opt,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
get filePath() {
|
||||
if (!this._originalPath) {
|
||||
if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
|
||||
throw new AnonymousError("path_not_defined", {
|
||||
object: this,
|
||||
httpStatus: 400,
|
||||
});
|
||||
}
|
||||
return this.anonymizedPath;
|
||||
}
|
||||
|
||||
return this._originalPath;
|
||||
}
|
||||
|
||||
async send(res: Response): Promise<void> {
|
||||
const anonymizer = this.repository.generateAnonymizeTransformer(
|
||||
this.anonymizedPath
|
||||
);
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("AnonymizedFile.send", async (span) => {
|
||||
span.setAttribute("repoId", this.repository.repoId);
|
||||
span.setAttribute("anonymizedPath", this.anonymizedPath);
|
||||
return new Promise<void>(async (resolve, reject) => {
|
||||
try {
|
||||
if (config.STREAMER_ENTRYPOINT) {
|
||||
// use the streamer service
|
||||
got
|
||||
.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
|
||||
method: "POST",
|
||||
json: {
|
||||
token: await this.repository.getToken(),
|
||||
repoFullName: this.repository.model.source.repositoryName,
|
||||
commit: this.repository.model.source.commit,
|
||||
branch: this.repository.model.source.branch,
|
||||
repoId: this.repository.repoId,
|
||||
filePath: this.filePath,
|
||||
sha: await this.sha(),
|
||||
anonymizerOptions: anonymizer.opt,
|
||||
},
|
||||
})
|
||||
.on("error", () => {
|
||||
handleError(
|
||||
new AnonymousError("file_not_found", {
|
||||
object: this,
|
||||
httpStatus: 404,
|
||||
}),
|
||||
res
|
||||
);
|
||||
})
|
||||
.pipe(res)
|
||||
.on("close", () => {
|
||||
span.end();
|
||||
resolve();
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const mime = lookup(this.anonymizedPath);
|
||||
if (mime && this.extension() != "ts") {
|
||||
res.contentType(mime);
|
||||
} else if (isTextFile(this.anonymizedPath)) {
|
||||
res.contentType("text/plain");
|
||||
}
|
||||
res.header("Accept-Ranges", "none");
|
||||
anonymizer.once("transform", (data) => {
|
||||
if (!mime && data.isText) {
|
||||
res.contentType("text/plain");
|
||||
}
|
||||
if (!data.wasAnonimized && this.fileSize) {
|
||||
// the text files may be anonymized and therefore the size may be different
|
||||
res.header("Content-Length", this.fileSize.toString());
|
||||
}
|
||||
});
|
||||
const content = await this.content();
|
||||
function handleStreamError(error: Error) {
|
||||
if (!content.closed && !content.destroyed) {
|
||||
content.destroy();
|
||||
}
|
||||
span.recordException(error);
|
||||
span.end();
|
||||
reject(error);
|
||||
// handleError(error, res);
|
||||
}
|
||||
content
|
||||
.on("error", handleStreamError)
|
||||
.pipe(anonymizer)
|
||||
.pipe(res)
|
||||
.on("error", handleStreamError)
|
||||
.on("close", () => {
|
||||
if (!content.closed && !content.destroyed) {
|
||||
content.destroy();
|
||||
}
|
||||
span.end();
|
||||
resolve();
|
||||
});
|
||||
} catch (error) {
|
||||
handleError(error, res);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
53
src/core/AnonymousError.ts
Normal file
53
src/core/AnonymousError.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import { CustomError } from "ts-custom-error";
|
||||
import AnonymizedFile from "./AnonymizedFile";
|
||||
import Repository from "./Repository";
|
||||
import GitHubBase from "./source/GitHubBase";
|
||||
import { GitHubRepository } from "./source/GitHubRepository";
|
||||
import User from "./User";
|
||||
|
||||
/**
|
||||
* Custom error message
|
||||
*/
|
||||
export default class AnonymousError extends CustomError {
|
||||
value?: any;
|
||||
httpStatus?: number;
|
||||
cause?: Error;
|
||||
|
||||
constructor(
|
||||
message: string,
|
||||
opt?: {
|
||||
httpStatus?: number;
|
||||
cause?: Error;
|
||||
object?: any;
|
||||
}
|
||||
) {
|
||||
super(message);
|
||||
this.value = opt?.object;
|
||||
this.httpStatus = opt?.httpStatus;
|
||||
this.cause = opt?.cause;
|
||||
}
|
||||
|
||||
toString(): string {
|
||||
let out = "";
|
||||
let detail = this.value ? JSON.stringify(this.value) : null;
|
||||
if (this.value instanceof Repository) {
|
||||
detail = this.value.repoId;
|
||||
} else if (this.value instanceof AnonymizedFile) {
|
||||
detail = `/r/${this.value.repository.repoId}/${this.value.anonymizedPath}`;
|
||||
} else if (this.value instanceof GitHubRepository) {
|
||||
detail = `${this.value.fullName}`;
|
||||
} else if (this.value instanceof User) {
|
||||
detail = `${this.value.username}`;
|
||||
} else if (this.value instanceof GitHubBase) {
|
||||
detail = `GHDownload ${this.value.data.repoId}`;
|
||||
}
|
||||
out += this.message;
|
||||
if (detail) {
|
||||
out += `: ${detail}`;
|
||||
}
|
||||
if (this.cause) {
|
||||
out += `\n\tCause by ${this.cause}\n${this.cause.stack}`;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
138
src/core/Conference.ts
Normal file
138
src/core/Conference.ts
Normal file
@@ -0,0 +1,138 @@
|
||||
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import { IConferenceDocument } from "./model/conference/conferences.types";
|
||||
import Repository from "./Repository";
|
||||
import { ConferenceStatus } from "./types";
|
||||
|
||||
export default class Conference {
|
||||
private _data: IConferenceDocument;
|
||||
private _repositories: Repository[] = [];
|
||||
|
||||
constructor(data: IConferenceDocument) {
|
||||
this._data = data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the status of the conference
|
||||
* @param status the new status
|
||||
* @param errorMessage a potential error message to display
|
||||
*/
|
||||
async updateStatus(status: ConferenceStatus, errorMessage?: string) {
|
||||
this._data.status = status;
|
||||
await this._data.save();
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the conference is expired
|
||||
*/
|
||||
isExpired() {
|
||||
return this._data.endDate < new Date();
|
||||
}
|
||||
|
||||
/**
|
||||
* Expire the conference
|
||||
*/
|
||||
async expire() {
|
||||
await this.updateStatus("expired");
|
||||
await Promise.all(
|
||||
(await this.repositories()).map(async (conf) => await conf.expire())
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the conference
|
||||
*/
|
||||
async remove() {
|
||||
await this.updateStatus("removed");
|
||||
await Promise.all(
|
||||
(await this.repositories()).map(async (conf) => await conf.remove())
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of repositories of this conference
|
||||
*
|
||||
* @returns the list of repositories of this conference
|
||||
*/
|
||||
async repositories(): Promise<Repository[]> {
|
||||
if (this._repositories) return this._repositories;
|
||||
const repoIds = this._data.repositories
|
||||
.filter((r) => !r.removeDate)
|
||||
.map((r) => r.id)
|
||||
.filter((f) => f);
|
||||
this._repositories = (
|
||||
await AnonymizedRepositoryModel.find({
|
||||
_id: { $in: repoIds },
|
||||
})
|
||||
).map((r) => new Repository(r));
|
||||
return this._repositories;
|
||||
}
|
||||
|
||||
get ownerIDs() {
|
||||
return this._data?.owners;
|
||||
}
|
||||
|
||||
get quota() {
|
||||
return this._data.plan.quota;
|
||||
}
|
||||
|
||||
get status() {
|
||||
return this._data.status;
|
||||
}
|
||||
|
||||
get conferenceID() {
|
||||
return this._data.conferenceID;
|
||||
}
|
||||
|
||||
get name() {
|
||||
return this._data.name;
|
||||
}
|
||||
|
||||
get startDate() {
|
||||
return this._data.startDate;
|
||||
}
|
||||
|
||||
get endDate() {
|
||||
return this._data.endDate;
|
||||
}
|
||||
|
||||
get url() {
|
||||
return this._data.url;
|
||||
}
|
||||
|
||||
get options() {
|
||||
return this._data.options;
|
||||
}
|
||||
|
||||
toJSON(opt?: { billing: boolean }): any {
|
||||
const pricePerHourPerRepo = this._data.plan.pricePerRepository / 30;
|
||||
let price = 0;
|
||||
const today =
|
||||
new Date() > this._data.endDate ? this._data.endDate : new Date();
|
||||
this._data.repositories.forEach((r) => {
|
||||
const removeDate =
|
||||
r.removeDate && r.removeDate < today ? r.removeDate : today;
|
||||
price +=
|
||||
(Math.max(removeDate.getTime() - r.addDate.getTime(), 0) /
|
||||
1000 /
|
||||
60 /
|
||||
60 /
|
||||
24) *
|
||||
pricePerHourPerRepo;
|
||||
});
|
||||
return {
|
||||
conferenceID: this._data.conferenceID,
|
||||
name: this._data.name,
|
||||
url: this._data.url,
|
||||
startDate: this._data.startDate,
|
||||
endDate: this._data.endDate,
|
||||
status: this._data.status,
|
||||
billing: this._data.billing,
|
||||
options: this._data.options,
|
||||
plan: this._data.plan,
|
||||
price,
|
||||
nbRepositories: this._data.repositories.filter((r) => !r.removeDate)
|
||||
.length,
|
||||
};
|
||||
}
|
||||
}
|
||||
58
src/core/GitHubUtils.ts
Normal file
58
src/core/GitHubUtils.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { Octokit } from "@octokit/rest";
|
||||
|
||||
import Repository from "./Repository";
|
||||
import UserModel from "./model/users/users.model";
|
||||
import config from "../config";
|
||||
|
||||
export function octokit(token: string) {
|
||||
return new Octokit({
|
||||
auth: token,
|
||||
request: {
|
||||
fetch: fetch,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function checkToken(token: string) {
|
||||
const oct = octokit(token);
|
||||
try {
|
||||
await oct.users.getAuthenticated();
|
||||
return true;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getToken(repository: Repository) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHUtils.getToken");
|
||||
span.setAttribute("repoId", repository.repoId);
|
||||
try {
|
||||
if (repository.model.source.accessToken) {
|
||||
if (await checkToken(repository.model.source.accessToken)) {
|
||||
return repository.model.source.accessToken;
|
||||
}
|
||||
}
|
||||
if (!repository.owner.model.accessTokens?.github) {
|
||||
const accessTokens = (
|
||||
await UserModel.findById(repository.owner.id, {
|
||||
accessTokens: 1,
|
||||
})
|
||||
)?.accessTokens;
|
||||
if (accessTokens) {
|
||||
repository.owner.model.accessTokens = accessTokens;
|
||||
}
|
||||
}
|
||||
if (repository.owner.model.accessTokens?.github) {
|
||||
const check = await checkToken(
|
||||
repository.owner.model.accessTokens?.github
|
||||
);
|
||||
if (check) {
|
||||
return repository.owner.model.accessTokens?.github;
|
||||
}
|
||||
}
|
||||
return config.GITHUB_TOKEN;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
329
src/core/PullRequest.ts
Normal file
329
src/core/PullRequest.ts
Normal file
@@ -0,0 +1,329 @@
|
||||
import { RepositoryStatus } from "./types";
|
||||
import User from "./User";
|
||||
import UserModel from "./model/users/users.model";
|
||||
import Conference from "./Conference";
|
||||
import ConferenceModel from "./model/conference/conferences.model";
|
||||
import AnonymousError from "./AnonymousError";
|
||||
import { IAnonymizedPullRequestDocument } from "./model/anonymizedPullRequests/anonymizedPullRequests.types";
|
||||
import config from "../config";
|
||||
import got from "got";
|
||||
import { octokit } from "./GitHubUtils";
|
||||
import { ContentAnonimizer } from "./anonymize-utils";
|
||||
|
||||
export default class PullRequest {
|
||||
private _model: IAnonymizedPullRequestDocument;
|
||||
owner: User;
|
||||
|
||||
constructor(data: IAnonymizedPullRequestDocument) {
|
||||
this._model = data;
|
||||
this.owner = new User(new UserModel({ _id: data.owner }));
|
||||
this.owner.model.isNew = false;
|
||||
}
|
||||
|
||||
async getToken() {
|
||||
let owner = this.owner.model;
|
||||
if (owner && !owner.accessTokens.github) {
|
||||
const temp = await UserModel.findById(owner._id);
|
||||
if (temp) {
|
||||
owner = temp;
|
||||
}
|
||||
}
|
||||
if (owner && owner.accessTokens && owner.accessTokens.github) {
|
||||
if (owner.accessTokens.github != this._model.source.accessToken) {
|
||||
this._model.source.accessToken = owner.accessTokens.github;
|
||||
}
|
||||
return owner.accessTokens.github;
|
||||
}
|
||||
if (this._model.source.accessToken) {
|
||||
try {
|
||||
return this._model.source.accessToken;
|
||||
} catch (error) {
|
||||
console.debug(
|
||||
"[ERROR] Token is invalid",
|
||||
this._model.source.pullRequestId
|
||||
);
|
||||
}
|
||||
}
|
||||
return config.GITHUB_TOKEN;
|
||||
}
|
||||
|
||||
async download() {
|
||||
console.debug(
|
||||
"[INFO] Downloading pull request",
|
||||
this._model.source.pullRequestId
|
||||
);
|
||||
const oct = octokit(await this.getToken());
|
||||
|
||||
const [owner, repo] = this._model.source.repositoryFullName.split("/");
|
||||
const pull_number = this._model.source.pullRequestId;
|
||||
|
||||
const [prInfo, comments, diff] = await Promise.all([
|
||||
oct.rest.pulls.get({
|
||||
owner,
|
||||
repo,
|
||||
pull_number,
|
||||
}),
|
||||
oct.paginate("GET /repos/{owner}/{repo}/issues/{issue_number}/comments", {
|
||||
owner: owner,
|
||||
repo: repo,
|
||||
issue_number: pull_number,
|
||||
per_page: 100,
|
||||
}),
|
||||
got(`https://github.com/${owner}/${repo}/pull/${pull_number}.diff`),
|
||||
]);
|
||||
|
||||
this._model.pullRequest = {
|
||||
diff: diff.body,
|
||||
title: prInfo.data.title,
|
||||
body: prInfo.data.body || "",
|
||||
creationDate: new Date(prInfo.data.created_at),
|
||||
updatedDate: new Date(prInfo.data.updated_at),
|
||||
draft: prInfo.data.draft,
|
||||
merged: prInfo.data.merged,
|
||||
mergedDate: prInfo.data.merged_at
|
||||
? new Date(prInfo.data.merged_at)
|
||||
: undefined,
|
||||
state: prInfo.data.state,
|
||||
baseRepositoryFullName: prInfo.data.base.repo.full_name,
|
||||
headRepositoryFullName: prInfo.data.head.repo?.full_name,
|
||||
comments: comments.map((comment) => ({
|
||||
body: comment.body || "",
|
||||
creationDate: new Date(comment.created_at),
|
||||
updatedDate: new Date(comment.updated_at),
|
||||
author: comment.user?.login || "",
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the status of the pullRequest
|
||||
*/
|
||||
check() {
|
||||
if (
|
||||
this._model.options.expirationMode !== "never" &&
|
||||
this.status == "ready" &&
|
||||
this._model.options.expirationDate
|
||||
) {
|
||||
if (this._model.options.expirationDate <= new Date()) {
|
||||
this.expire();
|
||||
}
|
||||
}
|
||||
if (
|
||||
this.status == "expired" ||
|
||||
this.status == "expiring" ||
|
||||
this.status == "removing" ||
|
||||
this.status == "removed"
|
||||
) {
|
||||
throw new AnonymousError("pullRequest_expired", {
|
||||
object: this,
|
||||
httpStatus: 410,
|
||||
});
|
||||
}
|
||||
const fiveMinuteAgo = new Date();
|
||||
fiveMinuteAgo.setMinutes(fiveMinuteAgo.getMinutes() - 5);
|
||||
|
||||
if (
|
||||
this.status == "preparing" ||
|
||||
(this.status == "download" && this._model.statusDate > fiveMinuteAgo)
|
||||
) {
|
||||
throw new AnonymousError("pullRequest_not_ready", {
|
||||
object: this,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the pullRequest if a new commit exists
|
||||
*
|
||||
* @returns void
|
||||
*/
|
||||
async updateIfNeeded(opt?: { force: boolean }): Promise<void> {
|
||||
const yesterday = new Date();
|
||||
yesterday.setDate(yesterday.getDate() - 1);
|
||||
if (
|
||||
opt?.force ||
|
||||
(this._model.options.update && this._model.anonymizeDate < yesterday)
|
||||
) {
|
||||
await this.updateStatus(RepositoryStatus.DOWNLOAD);
|
||||
await this.download();
|
||||
this._model.anonymizeDate = new Date();
|
||||
await this.updateStatus(RepositoryStatus.READY);
|
||||
await this._model.save();
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Download the require state for the pullRequest to work
|
||||
*
|
||||
* @returns void
|
||||
*/
|
||||
async anonymize() {
|
||||
if (this.status === RepositoryStatus.READY) return;
|
||||
await this.updateStatus(RepositoryStatus.PREPARING);
|
||||
await this.updateIfNeeded({ force: true });
|
||||
await this.updateStatus(RepositoryStatus.READY);
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the last view and view count
|
||||
*/
|
||||
async countView() {
|
||||
this._model.lastView = new Date();
|
||||
this._model.pageView = (this._model.pageView || 0) + 1;
|
||||
await this._model.save();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the status of the pullRequest
|
||||
* @param status the new status
|
||||
* @param errorMessage a potential error message to display
|
||||
*/
|
||||
async updateStatus(status: RepositoryStatus, statusMessage?: string) {
|
||||
this._model.status = status;
|
||||
this._model.statusDate = new Date();
|
||||
this._model.statusMessage = statusMessage;
|
||||
await this._model.save();
|
||||
}
|
||||
|
||||
/**
|
||||
* Expire the pullRequest
|
||||
*/
|
||||
async expire() {
|
||||
await this.updateStatus(RepositoryStatus.EXPIRING);
|
||||
await this.resetSate();
|
||||
await this.updateStatus(RepositoryStatus.EXPIRED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the pullRequest
|
||||
*/
|
||||
async remove() {
|
||||
await this.updateStatus(RepositoryStatus.REMOVING);
|
||||
await this.resetSate();
|
||||
await this.updateStatus(RepositoryStatus.REMOVED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset/delete the state of the pullRequest
|
||||
*/
|
||||
async resetSate(status?: RepositoryStatus, statusMessage?: string) {
|
||||
if (status) this._model.status = status;
|
||||
if (statusMessage) this._model.statusMessage = statusMessage;
|
||||
// remove cache
|
||||
this._model.pullRequest.comments = [];
|
||||
this._model.pullRequest.body = "";
|
||||
this._model.pullRequest.title = "";
|
||||
this._model.pullRequest.diff = "";
|
||||
this._model.pullRequest.baseRepositoryFullName = "";
|
||||
this._model.pullRequest.headRepositoryFullName = "";
|
||||
this._model.pullRequest.merged = false;
|
||||
this._model.pullRequest.mergedDate = undefined;
|
||||
this._model.pullRequest.state = "closed";
|
||||
this._model.pullRequest.draft = false;
|
||||
await this._model.save();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the conference of the pullRequest
|
||||
*
|
||||
* @returns conference of the pullRequest
|
||||
*/
|
||||
async conference(): Promise<Conference | null> {
|
||||
if (!this._model.conference) {
|
||||
return null;
|
||||
}
|
||||
const conference = await ConferenceModel.findOne({
|
||||
conferenceID: this._model.conference,
|
||||
});
|
||||
if (conference) return new Conference(conference);
|
||||
return null;
|
||||
}
|
||||
|
||||
content() {
|
||||
const output: any = {
|
||||
anonymizeDate: this._model.anonymizeDate,
|
||||
merged: this._model.pullRequest.merged,
|
||||
mergedDate: this._model.pullRequest.mergedDate,
|
||||
state: this._model.pullRequest.state,
|
||||
draft: this._model.pullRequest.draft,
|
||||
};
|
||||
const anonymizer = new ContentAnonimizer({
|
||||
...this.options,
|
||||
repoId: this.pullRequestId,
|
||||
});
|
||||
if (this.options.title) {
|
||||
output.title = anonymizer.anonymize(this._model.pullRequest.title);
|
||||
}
|
||||
if (this.options.body) {
|
||||
output.body = anonymizer.anonymize(this._model.pullRequest.body);
|
||||
}
|
||||
if (this.options.comments) {
|
||||
output.comments = this._model.pullRequest.comments?.map((comment) => {
|
||||
const o: any = {};
|
||||
if (this.options.body) o.body = anonymizer.anonymize(comment.body);
|
||||
if (this.options.username)
|
||||
o.author = anonymizer.anonymize(comment.author);
|
||||
if (this.options.date) {
|
||||
o.updatedDate = comment.updatedDate;
|
||||
o.creationDate = comment.creationDate;
|
||||
}
|
||||
return o;
|
||||
});
|
||||
}
|
||||
if (this.options.diff) {
|
||||
output.diff = anonymizer.anonymize(this._model.pullRequest.diff);
|
||||
}
|
||||
if (this.options.origin) {
|
||||
output.baseRepositoryFullName =
|
||||
this._model.pullRequest.baseRepositoryFullName;
|
||||
}
|
||||
if (this.options.date) {
|
||||
output.updatedDate = this.model.pullRequest.updatedDate;
|
||||
output.creationDate = this.model.pullRequest.creationDate;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/***** Getters ********/
|
||||
|
||||
get pullRequestId() {
|
||||
return this._model.pullRequestId;
|
||||
}
|
||||
|
||||
get options() {
|
||||
return this._model.options;
|
||||
}
|
||||
|
||||
get source() {
|
||||
return this._model.source;
|
||||
}
|
||||
|
||||
get model() {
|
||||
return this._model;
|
||||
}
|
||||
|
||||
get status() {
|
||||
return this._model.status;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
pullRequestId: this._model.pullRequestId,
|
||||
options: this._model.options,
|
||||
conference: this._model.conference,
|
||||
anonymizeDate: this._model.anonymizeDate,
|
||||
status: this._model.status,
|
||||
state: this.model.pullRequest.state,
|
||||
merged: this.model.pullRequest.merged,
|
||||
mergedDate: this.model.pullRequest.mergedDate,
|
||||
statusMessage: this._model.statusMessage,
|
||||
source: {
|
||||
pullRequestId: this._model.source.pullRequestId,
|
||||
repositoryFullName: this._model.source.repositoryFullName,
|
||||
},
|
||||
pullRequest: this._model.pullRequest,
|
||||
lastView: this._model.lastView,
|
||||
pageView: this._model.pageView,
|
||||
};
|
||||
}
|
||||
}
|
||||
530
src/core/Repository.ts
Normal file
530
src/core/Repository.ts
Normal file
@@ -0,0 +1,530 @@
|
||||
import storage from "./storage";
|
||||
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
|
||||
import { Readable } from "stream";
|
||||
import User from "./User";
|
||||
import GitHubStream from "./source/GitHubStream";
|
||||
import GitHubDownload from "./source/GitHubDownload";
|
||||
import Zip from "./source/Zip";
|
||||
import { anonymizePath } from "./anonymize-utils";
|
||||
import UserModel from "./model/users/users.model";
|
||||
import { IAnonymizedRepositoryDocument } from "./model/anonymizedRepositories/anonymizedRepositories.types";
|
||||
import { AnonymizeTransformer } from "./anonymize-utils";
|
||||
import GitHubBase from "./source/GitHubBase";
|
||||
import Conference from "./Conference";
|
||||
import ConferenceModel from "./model/conference/conferences.model";
|
||||
import AnonymousError from "./AnonymousError";
|
||||
import { downloadQueue } from "../queue";
|
||||
import { isConnected } from "../server/database";
|
||||
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import { GitHubRepository } from "./source/GitHubRepository";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { getToken } from "./GitHubUtils";
|
||||
|
||||
function anonymizeTreeRecursive(
|
||||
tree: TreeElement,
|
||||
terms: string[],
|
||||
opt: {
|
||||
/** Include the file sha in the response */
|
||||
includeSha: boolean;
|
||||
} = {
|
||||
includeSha: false,
|
||||
}
|
||||
): TreeElement {
|
||||
if (typeof tree.size !== "object" && tree.sha !== undefined) {
|
||||
if (opt?.includeSha) return tree as TreeFile;
|
||||
return { size: tree.size } as TreeFile;
|
||||
}
|
||||
const output: Tree = {};
|
||||
Object.getOwnPropertyNames(tree).forEach((file) => {
|
||||
const anonymizedPath = anonymizePath(file, terms);
|
||||
output[anonymizedPath] = anonymizeTreeRecursive(
|
||||
(tree as Tree)[file],
|
||||
terms,
|
||||
opt
|
||||
);
|
||||
});
|
||||
return output;
|
||||
}
|
||||
|
||||
export default class Repository {
|
||||
private _model: IAnonymizedRepositoryDocument;
|
||||
owner: User;
|
||||
|
||||
constructor(data: IAnonymizedRepositoryDocument) {
|
||||
this._model = data;
|
||||
this.owner = new User(new UserModel({ _id: data.owner }));
|
||||
this.owner = new User(new UserModel({ _id: data.owner }));
|
||||
this.owner.model.isNew = false;
|
||||
}
|
||||
|
||||
private checkedToken: boolean = false;
|
||||
|
||||
async getToken() {
|
||||
if (this.checkedToken) return this._model.source.accessToken as string;
|
||||
const originalToken = this._model.source.accessToken;
|
||||
const token = await getToken(this);
|
||||
if (originalToken != token) {
|
||||
this._model.source.accessToken = token;
|
||||
await this._model.save();
|
||||
}
|
||||
this.checkedToken = true;
|
||||
return token;
|
||||
}
|
||||
|
||||
get source() {
|
||||
const ghRepo = new GitHubRepository({
|
||||
name: this.model.source.repositoryName,
|
||||
});
|
||||
switch (this.model.source.type) {
|
||||
case "GitHubDownload":
|
||||
return new GitHubDownload({
|
||||
repoId: this.repoId,
|
||||
commit: this.model.source.commit || "HEAD",
|
||||
organization: ghRepo.owner,
|
||||
repoName: ghRepo.repo,
|
||||
getToken: () => this.getToken(),
|
||||
});
|
||||
case "GitHubStream":
|
||||
return new GitHubStream({
|
||||
repoId: this.repoId,
|
||||
commit: this.model.source.commit || "HEAD",
|
||||
organization: ghRepo.owner,
|
||||
repoName: ghRepo.repo,
|
||||
getToken: () => this.getToken(),
|
||||
});
|
||||
case "Zip":
|
||||
return new Zip(this.model.source, this.repoId);
|
||||
default:
|
||||
throw new AnonymousError("unsupported_source", {
|
||||
object: this,
|
||||
httpStatus: 400,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the anonymized file tree
|
||||
* @param opt force to get an updated list of files
|
||||
* @returns The anonymized file tree
|
||||
*/
|
||||
async anonymizedFiles(
|
||||
opt: {
|
||||
/** Force to refresh the file tree */
|
||||
force?: boolean;
|
||||
/** Include the file sha in the response */
|
||||
includeSha: boolean;
|
||||
} = {
|
||||
force: false,
|
||||
includeSha: false,
|
||||
}
|
||||
): Promise<Tree> {
|
||||
const terms = this._model.options.terms || [];
|
||||
return anonymizeTreeRecursive(await this.files(opt), terms, opt) as Tree;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the file tree
|
||||
*
|
||||
* @param opt force to get an updated list of files
|
||||
* @returns The file tree
|
||||
*/
|
||||
async files(opt: { force?: boolean } = { force: false }): Promise<Tree> {
|
||||
const span = trace.getTracer("ano-file").startSpan("Repository.files");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
try {
|
||||
if (!this._model.originalFiles && !opt.force) {
|
||||
const res = await AnonymizedRepositoryModel.findById(this._model._id, {
|
||||
originalFiles: 1,
|
||||
});
|
||||
if (!res) throw new AnonymousError("repository_not_found");
|
||||
this.model.originalFiles = res.originalFiles;
|
||||
}
|
||||
if (
|
||||
this._model.originalFiles &&
|
||||
Object.getOwnPropertyNames(this._model.originalFiles).length !== 0 &&
|
||||
!opt.force
|
||||
) {
|
||||
return this._model.originalFiles;
|
||||
}
|
||||
const files = await this.source.getFiles();
|
||||
this._model.originalFiles = files;
|
||||
this._model.size = { storage: 0, file: 0 };
|
||||
await this.computeSize();
|
||||
return files;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the status of the repository
|
||||
*/
|
||||
check() {
|
||||
if (
|
||||
this._model.options.expirationMode !== "never" &&
|
||||
this.status == RepositoryStatus.READY &&
|
||||
this._model.options.expirationDate
|
||||
) {
|
||||
if (this._model.options.expirationDate <= new Date()) {
|
||||
this.expire();
|
||||
}
|
||||
}
|
||||
if (
|
||||
this.status == RepositoryStatus.EXPIRED ||
|
||||
this.status == RepositoryStatus.EXPIRING ||
|
||||
this.status == RepositoryStatus.REMOVING ||
|
||||
this.status == RepositoryStatus.REMOVED
|
||||
) {
|
||||
throw new AnonymousError("repository_expired", {
|
||||
object: this,
|
||||
httpStatus: 410,
|
||||
});
|
||||
}
|
||||
const fiveMinuteAgo = new Date();
|
||||
fiveMinuteAgo.setMinutes(fiveMinuteAgo.getMinutes() - 5);
|
||||
|
||||
if (
|
||||
this.status == RepositoryStatus.PREPARING ||
|
||||
(this.status == RepositoryStatus.DOWNLOAD &&
|
||||
this._model.statusDate > fiveMinuteAgo)
|
||||
) {
|
||||
throw new AnonymousError("repository_not_ready", {
|
||||
object: this,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress and anonymize the repository
|
||||
*
|
||||
* @returns A stream of anonymized repository compressed
|
||||
*/
|
||||
zip(): Promise<Readable> {
|
||||
return storage.archive(this.repoId, "", {
|
||||
format: "zip",
|
||||
fileTransformer: (filename: string) =>
|
||||
this.generateAnonymizeTransformer(filename),
|
||||
});
|
||||
}
|
||||
|
||||
generateAnonymizeTransformer(filePath: string) {
|
||||
return new AnonymizeTransformer({
|
||||
filePath: filePath,
|
||||
terms: this.options.terms,
|
||||
image: this.options.image,
|
||||
link: this.options.link,
|
||||
repoId: this.repoId,
|
||||
repoName: this.model.source.repositoryName,
|
||||
branchName: this.model.source.branch || "main",
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the repository if a new commit exists
|
||||
*
|
||||
* @returns void
|
||||
*/
|
||||
async updateIfNeeded(opt?: { force: boolean }): Promise<void> {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("Repository.updateIfNeeded");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
const yesterday = new Date();
|
||||
yesterday.setDate(yesterday.getDate() - 1);
|
||||
if (
|
||||
opt?.force ||
|
||||
(this._model.options.update && this._model.lastView < yesterday)
|
||||
) {
|
||||
// Only GitHubBase can be update for the moment
|
||||
if (this.source instanceof GitHubBase) {
|
||||
const token = await this.getToken();
|
||||
const ghRepo = new GitHubRepository({
|
||||
name: this.model.source.repositoryName,
|
||||
});
|
||||
const branches = await ghRepo.branches({
|
||||
force: true,
|
||||
accessToken: token,
|
||||
});
|
||||
const branchName = this.model.source.branch || "main";
|
||||
const newCommit = branches.filter((f) => f.name == branchName)[0]
|
||||
?.commit;
|
||||
if (
|
||||
this.model.source.commit == newCommit &&
|
||||
this.status == RepositoryStatus.READY
|
||||
) {
|
||||
console.log(`[UPDATE] ${this._model.repoId} is up to date`);
|
||||
span.setAttribute("status", "up_to_date");
|
||||
span.end();
|
||||
return;
|
||||
}
|
||||
this._model.source.commit = newCommit;
|
||||
const commitInfo = await ghRepo.getCommitInfo(newCommit, {
|
||||
accessToken: token,
|
||||
});
|
||||
if (
|
||||
commitInfo.commit?.author?.date ||
|
||||
commitInfo.commit?.committer?.date
|
||||
) {
|
||||
const d = (commitInfo.commit?.author?.date ||
|
||||
commitInfo.commit.committer?.date) as string;
|
||||
this._model.source.commitDate = new Date(d);
|
||||
}
|
||||
this.model.source.commit = newCommit;
|
||||
|
||||
if (!newCommit) {
|
||||
console.error(
|
||||
`${branchName} for ${this.model.source.repositoryName} is not found`
|
||||
);
|
||||
await this.updateStatus(RepositoryStatus.ERROR, "branch_not_found");
|
||||
await this.resetSate();
|
||||
span.setAttribute("status", "branch_not_found");
|
||||
span.end();
|
||||
throw new AnonymousError("branch_not_found", {
|
||||
object: this,
|
||||
});
|
||||
}
|
||||
this._model.anonymizeDate = new Date();
|
||||
console.log(
|
||||
`[UPDATE] ${this._model.repoId} will be updated to ${newCommit}`
|
||||
);
|
||||
|
||||
await this.resetSate(RepositoryStatus.PREPARING);
|
||||
await downloadQueue.add(this.repoId, this, {
|
||||
jobId: this.repoId,
|
||||
attempts: 3,
|
||||
});
|
||||
}
|
||||
}
|
||||
span.end();
|
||||
}
|
||||
/**
|
||||
* Download the require state for the repository to work
|
||||
*
|
||||
* @returns void
|
||||
*/
|
||||
async anonymize() {
|
||||
const span = trace.getTracer("ano-file").startSpan("Repository.anonymize");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
if (this.status === RepositoryStatus.READY) {
|
||||
span.end();
|
||||
return;
|
||||
}
|
||||
await this.updateStatus(RepositoryStatus.PREPARING);
|
||||
await this.files();
|
||||
await this.updateStatus(RepositoryStatus.READY);
|
||||
span.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the last view and view count
|
||||
*/
|
||||
async countView() {
|
||||
const span = trace.getTracer("ano-file").startSpan("Repository.countView");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
try {
|
||||
this._model.lastView = new Date();
|
||||
this._model.pageView = (this._model.pageView || 0) + 1;
|
||||
if (!isConnected) return this.model;
|
||||
await this._model.save();
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the status of the repository
|
||||
* @param status the new status
|
||||
* @param errorMessage a potential error message to display
|
||||
*/
|
||||
async updateStatus(status: RepositoryStatus, statusMessage?: string) {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("Repository.updateStatus");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
span.setAttribute("status", status);
|
||||
span.setAttribute("statusMessage", statusMessage || "");
|
||||
try {
|
||||
if (!status) return this.model;
|
||||
this._model.status = status;
|
||||
this._model.statusDate = new Date();
|
||||
this._model.statusMessage = statusMessage;
|
||||
if (!isConnected) return this.model;
|
||||
await this._model.save();
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expire the repository
|
||||
*/
|
||||
async expire() {
|
||||
const span = trace.getTracer("ano-file").startSpan("Repository.expire");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
await this.updateStatus(RepositoryStatus.EXPIRING);
|
||||
await this.resetSate();
|
||||
await this.updateStatus(RepositoryStatus.EXPIRED);
|
||||
span.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the repository
|
||||
*/
|
||||
async remove() {
|
||||
const span = trace.getTracer("ano-file").startSpan("Repository.remove");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
await this.updateStatus(RepositoryStatus.REMOVING);
|
||||
await this.resetSate();
|
||||
await this.updateStatus(RepositoryStatus.REMOVED);
|
||||
span.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset/delete the state of the repository
|
||||
*/
|
||||
async resetSate(status?: RepositoryStatus, statusMessage?: string) {
|
||||
const span = trace.getTracer("ano-file").startSpan("Repository.resetState");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
// remove attribute
|
||||
this._model.size = { storage: 0, file: 0 };
|
||||
this._model.originalFiles = undefined;
|
||||
if (status) {
|
||||
await this.updateStatus(status, statusMessage);
|
||||
}
|
||||
// remove cache
|
||||
await this.removeCache();
|
||||
console.log(`[RESET] ${this._model.repoId} has been reset`);
|
||||
span.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the cached files
|
||||
* @returns
|
||||
*/
|
||||
async removeCache() {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("Repository.removeCache");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
try {
|
||||
return storage.rm(this.repoId);
|
||||
} finally {
|
||||
this.model.isReseted = true;
|
||||
await this.model.save();
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the size of the repository in term of storage and number of files.
|
||||
*
|
||||
* @returns The size of the repository in bite
|
||||
*/
|
||||
async computeSize(): Promise<{
|
||||
/**
|
||||
* Size of the repository in bit
|
||||
*/
|
||||
storage: number;
|
||||
/**
|
||||
* The number of files
|
||||
*/
|
||||
file: number;
|
||||
}> {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("Repository.removeCache");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
try {
|
||||
if (this.status !== RepositoryStatus.READY)
|
||||
return { storage: 0, file: 0 };
|
||||
if (this._model.size.file) return this._model.size;
|
||||
function recursiveCount(files: Tree): { storage: number; file: number } {
|
||||
const out = { storage: 0, file: 0 };
|
||||
for (const name in files) {
|
||||
const file = files[name];
|
||||
if (file.size && parseInt(file.size.toString()) == file.size) {
|
||||
out.storage += file.size as number;
|
||||
out.file++;
|
||||
} else if (typeof file == "object") {
|
||||
const r = recursiveCount(file as Tree);
|
||||
out.storage += r.storage;
|
||||
out.file += r.file;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
const files = await this.files();
|
||||
this._model.size = recursiveCount(files);
|
||||
await this._model.save();
|
||||
return this._model.size;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the conference of the repository
|
||||
*
|
||||
* @returns conference of the repository
|
||||
*/
|
||||
async conference(): Promise<Conference | null> {
|
||||
const span = trace.getTracer("ano-file").startSpan("Repository.conference");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
try {
|
||||
if (!this._model.conference) {
|
||||
return null;
|
||||
}
|
||||
const conference = await ConferenceModel.findOne({
|
||||
conferenceID: this._model.conference,
|
||||
});
|
||||
if (conference) return new Conference(conference);
|
||||
return null;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/***** Getters ********/
|
||||
|
||||
get repoId() {
|
||||
return this._model.repoId;
|
||||
}
|
||||
|
||||
get options() {
|
||||
return this._model.options;
|
||||
}
|
||||
|
||||
get model() {
|
||||
return this._model;
|
||||
}
|
||||
|
||||
get status() {
|
||||
return this._model.status;
|
||||
}
|
||||
|
||||
get size() {
|
||||
if (this.status != RepositoryStatus.READY) return { storage: 0, file: 0 };
|
||||
return this._model.size;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
repoId: this._model.repoId,
|
||||
options: this._model.options,
|
||||
conference: this._model.conference,
|
||||
anonymizeDate: this._model.anonymizeDate,
|
||||
status: this.status,
|
||||
statusMessage: this._model.statusMessage,
|
||||
lastView: this._model.lastView,
|
||||
pageView: this._model.pageView,
|
||||
size: this.size,
|
||||
source: {
|
||||
fullName: this.model.source.repositoryName,
|
||||
commit: this.model.source.commit,
|
||||
branch: this.model.source.branch,
|
||||
type: this.model.source.type,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
194
src/core/User.ts
Normal file
194
src/core/User.ts
Normal file
@@ -0,0 +1,194 @@
|
||||
import { trace } from "@opentelemetry/api";
|
||||
|
||||
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import RepositoryModel from "./model/repositories/repositories.model";
|
||||
import { IUserDocument } from "./model/users/users.types";
|
||||
import Repository from "./Repository";
|
||||
import { GitHubRepository } from "./source/GitHubRepository";
|
||||
import PullRequest from "./PullRequest";
|
||||
import AnonymizedPullRequestModel from "./model/anonymizedPullRequests/anonymizedPullRequests.model";
|
||||
import { octokit } from "./GitHubUtils";
|
||||
|
||||
/**
|
||||
* Model for a user
|
||||
*/
|
||||
export default class User {
|
||||
private _model: IUserDocument;
|
||||
constructor(model: IUserDocument) {
|
||||
this._model = model;
|
||||
}
|
||||
|
||||
get id(): string {
|
||||
return this._model.id;
|
||||
}
|
||||
|
||||
get username(): string {
|
||||
return this._model.username;
|
||||
}
|
||||
|
||||
get isAdmin(): boolean {
|
||||
return !!this._model.isAdmin;
|
||||
}
|
||||
|
||||
get accessToken(): string {
|
||||
return this._model.accessTokens.github;
|
||||
}
|
||||
|
||||
get photo(): string | undefined {
|
||||
return this._model.photo;
|
||||
}
|
||||
|
||||
get default() {
|
||||
return this._model.default;
|
||||
}
|
||||
|
||||
set default(d) {
|
||||
this._model.default = d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the GitHub repositories of the user
|
||||
* @param opt options
|
||||
* @returns the list of github repositories
|
||||
*/
|
||||
async getGitHubRepositories(opt?: {
|
||||
/**
|
||||
* Get the repository from GitHub
|
||||
*/
|
||||
force: boolean;
|
||||
}): Promise<GitHubRepository[]> {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("User.getGitHubRepositories");
|
||||
span.setAttribute("username", this.username);
|
||||
if (
|
||||
!this._model.repositories ||
|
||||
this._model.repositories.length == 0 ||
|
||||
opt?.force === true
|
||||
) {
|
||||
// get the list of repo from github
|
||||
const oct = octokit(this.accessToken);
|
||||
const repositories = (
|
||||
await oct.paginate("GET /user/repos", {
|
||||
visibility: "all",
|
||||
sort: "pushed",
|
||||
per_page: 100,
|
||||
})
|
||||
).map((r) => {
|
||||
return new RepositoryModel({
|
||||
externalId: "gh_" + r.id,
|
||||
name: r.full_name,
|
||||
url: r.html_url,
|
||||
size: r.size,
|
||||
defaultBranch: r.default_branch,
|
||||
});
|
||||
});
|
||||
|
||||
// find the repositories that are already in the database
|
||||
const finds = (
|
||||
await RepositoryModel.find({
|
||||
externalId: {
|
||||
$in: repositories.map((repo) => repo.externalId),
|
||||
},
|
||||
}).select("externalId")
|
||||
).map((m) => m.externalId);
|
||||
|
||||
// save all the new repositories
|
||||
await Promise.all(
|
||||
repositories
|
||||
.filter((r) => finds.indexOf(r.externalId) == -1)
|
||||
.map((r) => r.save())
|
||||
);
|
||||
|
||||
// save only the if of the repositories in the user model
|
||||
this._model.repositories = (
|
||||
await RepositoryModel.find({
|
||||
externalId: {
|
||||
$in: repositories.map((repo) => repo.externalId),
|
||||
},
|
||||
}).select("id")
|
||||
).map((m) => m.id);
|
||||
|
||||
// have the model
|
||||
await this._model.save();
|
||||
span.end();
|
||||
return repositories.map((r) => new GitHubRepository(r));
|
||||
} else {
|
||||
const out = (
|
||||
await RepositoryModel.find({ _id: { $in: this._model.repositories } })
|
||||
).map((i) => new GitHubRepository(i));
|
||||
span.end();
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the lost of anonymized repositories
|
||||
* @returns the list of anonymized repositories
|
||||
*/
|
||||
async getRepositories() {
|
||||
const span = trace.getTracer("ano-file").startSpan("User.getRepositories");
|
||||
span.setAttribute("username", this.username);
|
||||
const repositories = (
|
||||
await AnonymizedRepositoryModel.find(
|
||||
{
|
||||
owner: this.id,
|
||||
},
|
||||
{
|
||||
originalFiles: 0,
|
||||
}
|
||||
).exec()
|
||||
).map((d) => new Repository(d));
|
||||
const promises = [];
|
||||
for (let repo of repositories) {
|
||||
if (
|
||||
repo.status == "ready" &&
|
||||
repo.options.expirationMode != "never" &&
|
||||
repo.options.expirationDate != null &&
|
||||
repo.options.expirationDate < new Date()
|
||||
) {
|
||||
// expire the repository
|
||||
promises.push(repo.expire());
|
||||
}
|
||||
}
|
||||
await Promise.all(promises);
|
||||
span.end();
|
||||
return repositories;
|
||||
}
|
||||
/**
|
||||
* Get the lost of anonymized repositories
|
||||
* @returns the list of anonymized repositories
|
||||
*/
|
||||
async getPullRequests() {
|
||||
const span = trace.getTracer("ano-file").startSpan("User.getPullRequests");
|
||||
span.setAttribute("username", this.username);
|
||||
const pullRequests = (
|
||||
await AnonymizedPullRequestModel.find({
|
||||
owner: this.id,
|
||||
}).exec()
|
||||
).map((d) => new PullRequest(d));
|
||||
const promises = [];
|
||||
for (let repo of pullRequests) {
|
||||
if (
|
||||
repo.status == "ready" &&
|
||||
repo.options.expirationMode != "never" &&
|
||||
repo.options.expirationDate != null &&
|
||||
repo.options.expirationDate < new Date()
|
||||
) {
|
||||
// expire the repository
|
||||
promises.push(repo.expire());
|
||||
}
|
||||
}
|
||||
await Promise.all(promises);
|
||||
span.end();
|
||||
return pullRequests;
|
||||
}
|
||||
|
||||
get model() {
|
||||
return this._model;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return this._model.toJSON();
|
||||
}
|
||||
}
|
||||
226
src/core/anonymize-utils.ts
Normal file
226
src/core/anonymize-utils.ts
Normal file
@@ -0,0 +1,226 @@
|
||||
import { basename } from "path";
|
||||
import { Transform, Readable } from "stream";
|
||||
import { isText } from "istextorbinary";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
|
||||
import config from "../config";
|
||||
|
||||
const urlRegex =
|
||||
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
|
||||
|
||||
export function streamToString(stream: Readable): Promise<string> {
|
||||
const chunks: Buffer[] = [];
|
||||
return new Promise((resolve, reject) => {
|
||||
stream.on("data", (chunk) => chunks.push(Buffer.from(chunk)));
|
||||
stream.on("error", (err) => reject(err));
|
||||
stream.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
|
||||
});
|
||||
}
|
||||
|
||||
export function isTextFile(filePath: string, content?: Buffer) {
|
||||
const filename = basename(filePath);
|
||||
const extensions = filename.split(".").reverse();
|
||||
const extension = extensions[0].toLowerCase();
|
||||
if (config.additionalExtensions.includes(extension)) {
|
||||
return true;
|
||||
}
|
||||
if (isText(filename)) {
|
||||
return true;
|
||||
}
|
||||
return isText(filename, content);
|
||||
}
|
||||
|
||||
export class AnonymizeTransformer extends Transform {
|
||||
public isText: boolean | null = null;
|
||||
anonimizer: ContentAnonimizer;
|
||||
|
||||
constructor(
|
||||
readonly opt: {
|
||||
filePath: string;
|
||||
} & ConstructorParameters<typeof ContentAnonimizer>[0]
|
||||
) {
|
||||
super();
|
||||
this.isText = isTextFile(this.opt.filePath);
|
||||
this.anonimizer = new ContentAnonimizer(this.opt);
|
||||
}
|
||||
|
||||
get wasAnonimized() {
|
||||
return this.anonimizer.wasAnonymized;
|
||||
}
|
||||
|
||||
_transform(chunk: Buffer, encoding: string, callback: () => void) {
|
||||
trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("AnonymizeTransformer.transform", async (span) => {
|
||||
span.setAttribute("path", this.opt.filePath);
|
||||
if (this.isText === null) {
|
||||
this.isText = isTextFile(this.opt.filePath, chunk);
|
||||
}
|
||||
if (this.isText) {
|
||||
const content = this.anonimizer.anonymize(chunk.toString());
|
||||
if (this.anonimizer.wasAnonymized) {
|
||||
chunk = Buffer.from(content);
|
||||
}
|
||||
}
|
||||
|
||||
this.emit("transform", {
|
||||
isText: this.isText,
|
||||
wasAnonimized: this.wasAnonimized,
|
||||
chunk,
|
||||
});
|
||||
|
||||
this.push(chunk);
|
||||
span.end();
|
||||
callback();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export class ContentAnonimizer {
|
||||
public wasAnonymized = false;
|
||||
|
||||
constructor(
|
||||
readonly opt: {
|
||||
image?: boolean;
|
||||
link?: boolean;
|
||||
terms?: string[];
|
||||
repoName?: string;
|
||||
branchName?: string;
|
||||
repoId?: string;
|
||||
}
|
||||
) {}
|
||||
|
||||
private removeImage(content: string): string {
|
||||
if (this.opt.image !== false) {
|
||||
return content;
|
||||
}
|
||||
// remove image in markdown
|
||||
return content.replace(
|
||||
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
|
||||
() => {
|
||||
this.wasAnonymized = true;
|
||||
return config.ANONYMIZATION_MASK;
|
||||
}
|
||||
);
|
||||
}
|
||||
private removeLink(content: string): string {
|
||||
if (this.opt.link !== false) {
|
||||
return content;
|
||||
}
|
||||
// remove image in markdown
|
||||
return content.replace(urlRegex, () => {
|
||||
this.wasAnonymized = true;
|
||||
return config.ANONYMIZATION_MASK;
|
||||
});
|
||||
}
|
||||
|
||||
private replaceGitHubSelfLinks(content: string): string {
|
||||
if (!this.opt.repoName || !this.opt.branchName) {
|
||||
return content;
|
||||
}
|
||||
const repoName = this.opt.repoName;
|
||||
const branchName = this.opt.branchName;
|
||||
|
||||
const replaceCallback = () => {
|
||||
this.wasAnonymized = true;
|
||||
return `https://${config.APP_HOSTNAME}/r/${this.opt.repoId}`;
|
||||
};
|
||||
content = content.replace(
|
||||
new RegExp(
|
||||
`https://raw.githubusercontent.com/${repoName}/${branchName}\\b`,
|
||||
"gi"
|
||||
),
|
||||
replaceCallback
|
||||
);
|
||||
content = content.replace(
|
||||
new RegExp(`https://github.com/${repoName}/blob/${branchName}\\b`, "gi"),
|
||||
replaceCallback
|
||||
);
|
||||
content = content.replace(
|
||||
new RegExp(`https://github.com/${repoName}/tree/${branchName}\\b`, "gi"),
|
||||
replaceCallback
|
||||
);
|
||||
return content.replace(
|
||||
new RegExp(`https://github.com/${repoName}`, "gi"),
|
||||
replaceCallback
|
||||
);
|
||||
}
|
||||
|
||||
private replaceTerms(content: string): string {
|
||||
const terms = this.opt.terms || [];
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
let term = terms[i];
|
||||
if (term.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
const mask = config.ANONYMIZATION_MASK + "-" + (i + 1);
|
||||
try {
|
||||
new RegExp(term, "gi");
|
||||
} catch {
|
||||
// escape regex characters
|
||||
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
|
||||
}
|
||||
// remove whole url if it contains the term
|
||||
content = content.replace(urlRegex, (match) => {
|
||||
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) {
|
||||
this.wasAnonymized = true;
|
||||
return mask;
|
||||
}
|
||||
return match;
|
||||
});
|
||||
|
||||
// remove the term in the text
|
||||
content = content.replace(new RegExp(`\\b${term}\\b`, "gi"), () => {
|
||||
this.wasAnonymized = true;
|
||||
return mask;
|
||||
});
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
anonymize(content: string) {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("ContentAnonimizer.anonymize");
|
||||
try {
|
||||
content = this.removeImage(content);
|
||||
span.addEvent("removeImage");
|
||||
content = this.removeLink(content);
|
||||
span.addEvent("removeLink");
|
||||
content = this.replaceGitHubSelfLinks(content);
|
||||
span.addEvent("replaceGitHubSelfLinks");
|
||||
content = this.replaceTerms(content);
|
||||
span.addEvent("replaceTerms");
|
||||
return content;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function anonymizePath(path: string, terms: string[]) {
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("utils.anonymizePath", (span) => {
|
||||
span.setAttribute("path", path);
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
let term = terms[i];
|
||||
if (term.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
new RegExp(term, "gi");
|
||||
} catch {
|
||||
// escape regex characters
|
||||
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
|
||||
}
|
||||
path = path.replace(
|
||||
new RegExp(term, "gi"),
|
||||
config.ANONYMIZATION_MASK + "-" + (i + 1)
|
||||
);
|
||||
}
|
||||
span.setAttribute("return", path);
|
||||
span.end();
|
||||
return path;
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
import { model } from "mongoose";
|
||||
|
||||
import AnonymizedPullRequestSchema from "./anonymizedPullRequests.schema";
|
||||
import {
|
||||
IAnonymizedPullRequestDocument,
|
||||
IAnonymizedPullRequestModel,
|
||||
} from "./anonymizedPullRequests.types";
|
||||
|
||||
const AnonymizedPullRequestModel = model<IAnonymizedPullRequestDocument>(
|
||||
"AnonymizedPullRequest",
|
||||
AnonymizedPullRequestSchema
|
||||
) as IAnonymizedPullRequestModel;
|
||||
|
||||
export default AnonymizedPullRequestModel;
|
||||
@@ -0,0 +1,66 @@
|
||||
import { Schema } from "mongoose";
|
||||
|
||||
const AnonymizedPullRequestSchema = new Schema({
|
||||
pullRequestId: {
|
||||
type: String,
|
||||
index: { unique: true },
|
||||
},
|
||||
status: {
|
||||
type: String,
|
||||
default: "preparing",
|
||||
},
|
||||
statusDate: Date,
|
||||
statusMessage: String,
|
||||
anonymizeDate: Date,
|
||||
lastView: Date,
|
||||
pageView: Number,
|
||||
owner: Schema.Types.ObjectId,
|
||||
conference: String,
|
||||
source: {
|
||||
pullRequestId: Number,
|
||||
repositoryFullName: String,
|
||||
accessToken: String,
|
||||
},
|
||||
options: {
|
||||
terms: [String],
|
||||
expirationMode: { type: String },
|
||||
expirationDate: Date,
|
||||
update: Boolean,
|
||||
image: Boolean,
|
||||
link: Boolean,
|
||||
title: Boolean,
|
||||
body: Boolean,
|
||||
comments: Boolean,
|
||||
diff: Boolean,
|
||||
origin: Boolean,
|
||||
username: Boolean,
|
||||
date: Boolean,
|
||||
},
|
||||
dateOfEntry: {
|
||||
type: Date,
|
||||
default: new Date(),
|
||||
},
|
||||
pullRequest: {
|
||||
diff: String,
|
||||
title: String,
|
||||
body: String,
|
||||
creationDate: Date,
|
||||
updatedDate: Date,
|
||||
draft: Boolean,
|
||||
merged: Boolean,
|
||||
mergedDate: Date,
|
||||
state: String,
|
||||
baseRepositoryFullName: String,
|
||||
headRepositoryFullName: String,
|
||||
comments: [
|
||||
{
|
||||
body: String,
|
||||
creationDate: Date,
|
||||
updatedDate: Date,
|
||||
author: String,
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
export default AnonymizedPullRequestSchema;
|
||||
@@ -0,0 +1,61 @@
|
||||
import { Document, Model } from "mongoose";
|
||||
import { RepositoryStatus } from "../../types";
|
||||
|
||||
export interface IAnonymizedPullRequest {
|
||||
pullRequestId: string;
|
||||
status?: RepositoryStatus;
|
||||
statusMessage?: string;
|
||||
statusDate: Date;
|
||||
anonymizeDate: Date;
|
||||
source: {
|
||||
pullRequestId: number;
|
||||
repositoryFullName: string;
|
||||
accessToken?: string;
|
||||
};
|
||||
owner: string;
|
||||
conference: string;
|
||||
options: {
|
||||
terms: string[];
|
||||
expirationMode: "never" | "redirect" | "remove";
|
||||
expirationDate?: Date;
|
||||
update: boolean;
|
||||
image: boolean;
|
||||
link: boolean;
|
||||
title: boolean;
|
||||
body: boolean;
|
||||
comments: boolean;
|
||||
diff: boolean;
|
||||
origin: boolean;
|
||||
username: boolean;
|
||||
date: boolean;
|
||||
};
|
||||
pageView: number;
|
||||
lastView: Date;
|
||||
pullRequest: {
|
||||
diff: string;
|
||||
title: string;
|
||||
body: string;
|
||||
creationDate: Date;
|
||||
updatedDate: Date;
|
||||
draft?: boolean;
|
||||
merged?: boolean;
|
||||
mergedDate?: Date;
|
||||
state?: string;
|
||||
baseRepositoryFullName?: string;
|
||||
headRepositoryFullName?: string;
|
||||
comments?: {
|
||||
body: string;
|
||||
creationDate: Date;
|
||||
updatedDate: Date;
|
||||
author: string;
|
||||
}[];
|
||||
};
|
||||
}
|
||||
|
||||
export interface IAnonymizedPullRequestDocument
|
||||
extends IAnonymizedPullRequest,
|
||||
Document {
|
||||
setLastUpdated: (this: IAnonymizedPullRequestDocument) => Promise<void>;
|
||||
}
|
||||
export interface IAnonymizedPullRequestModel
|
||||
extends Model<IAnonymizedPullRequestDocument> {}
|
||||
@@ -0,0 +1,14 @@
|
||||
import { model } from "mongoose";
|
||||
|
||||
import {
|
||||
IAnonymizedRepositoryDocument,
|
||||
IAnonymizedRepositoryModel,
|
||||
} from "./anonymizedRepositories.types";
|
||||
import AnonymizedRepositorySchema from "./anonymizedRepositories.schema";
|
||||
|
||||
const AnonymizedRepositoryModel = model<IAnonymizedRepositoryDocument>(
|
||||
"AnonymizedRepository",
|
||||
AnonymizedRepositorySchema
|
||||
) as IAnonymizedRepositoryModel;
|
||||
|
||||
export default AnonymizedRepositoryModel;
|
||||
@@ -0,0 +1,73 @@
|
||||
import { Schema } from "mongoose";
|
||||
|
||||
const AnonymizedRepositorySchema = new Schema({
|
||||
repoId: {
|
||||
type: String,
|
||||
index: { unique: true, collation: { locale: "en", strength: 2 } },
|
||||
},
|
||||
status: {
|
||||
type: String,
|
||||
default: "preparing",
|
||||
},
|
||||
statusDate: Date,
|
||||
statusMessage: String,
|
||||
anonymizeDate: Date,
|
||||
lastView: Date,
|
||||
pageView: Number,
|
||||
accessToken: String,
|
||||
owner: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: "user",
|
||||
index: true,
|
||||
},
|
||||
conference: String,
|
||||
source: {
|
||||
type: { type: String },
|
||||
branch: String,
|
||||
commit: String,
|
||||
commitDate: Date,
|
||||
repositoryId: String,
|
||||
repositoryName: String,
|
||||
accessToken: String,
|
||||
},
|
||||
truckedFileList: {
|
||||
type: Boolean,
|
||||
default: false,
|
||||
},
|
||||
originalFiles: Schema.Types.Mixed,
|
||||
options: {
|
||||
terms: [String],
|
||||
expirationMode: { type: String },
|
||||
expirationDate: Date,
|
||||
update: Boolean,
|
||||
image: Boolean,
|
||||
pdf: Boolean,
|
||||
notebook: Boolean,
|
||||
link: Boolean,
|
||||
page: Boolean,
|
||||
pageSource: {
|
||||
branch: String,
|
||||
path: String,
|
||||
},
|
||||
},
|
||||
dateOfEntry: {
|
||||
type: Date,
|
||||
default: new Date(),
|
||||
},
|
||||
size: {
|
||||
storage: {
|
||||
type: Number,
|
||||
default: 0,
|
||||
},
|
||||
file: {
|
||||
type: Number,
|
||||
default: 0,
|
||||
},
|
||||
},
|
||||
isReseted: {
|
||||
type: Boolean,
|
||||
default: false,
|
||||
},
|
||||
});
|
||||
|
||||
export default AnonymizedRepositorySchema;
|
||||
@@ -0,0 +1,53 @@
|
||||
import { Document, Model } from "mongoose";
|
||||
import { RepositoryStatus, Tree } from "../../types";
|
||||
|
||||
export interface IAnonymizedRepository {
|
||||
repoId: string;
|
||||
status?: RepositoryStatus;
|
||||
statusMessage?: string;
|
||||
statusDate: Date;
|
||||
anonymizeDate: Date;
|
||||
source: {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip";
|
||||
branch?: string;
|
||||
commit?: string;
|
||||
commitDate?: Date,
|
||||
repositoryId?: string;
|
||||
repositoryName?: string;
|
||||
accessToken?: string;
|
||||
};
|
||||
owner: string;
|
||||
truckedFileList: boolean;
|
||||
originalFiles?: Tree;
|
||||
conference: string;
|
||||
options: {
|
||||
terms: string[];
|
||||
expirationMode: "never" | "redirect" | "remove";
|
||||
expirationDate?: Date;
|
||||
update: boolean;
|
||||
image: boolean;
|
||||
pdf: boolean;
|
||||
notebook: boolean;
|
||||
link: boolean;
|
||||
page: boolean;
|
||||
pageSource?: {
|
||||
branch: string;
|
||||
path: string;
|
||||
};
|
||||
};
|
||||
pageView: number;
|
||||
lastView: Date;
|
||||
size: {
|
||||
storage: number;
|
||||
file: number;
|
||||
};
|
||||
isReseted: boolean;
|
||||
}
|
||||
|
||||
export interface IAnonymizedRepositoryDocument
|
||||
extends IAnonymizedRepository,
|
||||
Document {
|
||||
setLastUpdated: (this: IAnonymizedRepositoryDocument) => Promise<void>;
|
||||
}
|
||||
export interface IAnonymizedRepositoryModel
|
||||
extends Model<IAnonymizedRepositoryDocument> {}
|
||||
11
src/core/model/conference/conferences.model.ts
Normal file
11
src/core/model/conference/conferences.model.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
import { model } from "mongoose";
|
||||
|
||||
import { IConferenceDocument, IConferenceModel } from "./conferences.types";
|
||||
import ConferenceSchema from "./conferences.schema";
|
||||
|
||||
const ConferenceModel = model<IConferenceDocument>(
|
||||
"Conference",
|
||||
ConferenceSchema
|
||||
) as IConferenceModel;
|
||||
|
||||
export default ConferenceModel;
|
||||
58
src/core/model/conference/conferences.schema.ts
Normal file
58
src/core/model/conference/conferences.schema.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
import { Schema } from "mongoose";
|
||||
|
||||
const RepositorySchema = new Schema({
|
||||
name: String,
|
||||
conferenceID: {
|
||||
type: String,
|
||||
index: { unique: true },
|
||||
},
|
||||
url: String,
|
||||
startDate: Date,
|
||||
endDate: Date,
|
||||
status: String,
|
||||
owners: { type: [Schema.Types.ObjectId] },
|
||||
repositories: {
|
||||
type: [
|
||||
{
|
||||
id: { type: Schema.Types.ObjectId },
|
||||
addDate: { type: Date },
|
||||
removeDate: { type: Date },
|
||||
},
|
||||
],
|
||||
},
|
||||
options: {
|
||||
expirationMode: String,
|
||||
expirationDate: Date,
|
||||
update: Boolean,
|
||||
image: Boolean,
|
||||
pdf: Boolean,
|
||||
notebook: Boolean,
|
||||
link: Boolean,
|
||||
page: Boolean,
|
||||
},
|
||||
dateOfEntry: {
|
||||
type: Date,
|
||||
default: new Date(),
|
||||
},
|
||||
plan: {
|
||||
planID: String,
|
||||
pricePerRepository: Number,
|
||||
quota: {
|
||||
repository: Number,
|
||||
size: Number,
|
||||
file: Number,
|
||||
},
|
||||
},
|
||||
billing: {
|
||||
name: String,
|
||||
email: String,
|
||||
address: String,
|
||||
address2: String,
|
||||
city: String,
|
||||
zip: String,
|
||||
country: String,
|
||||
vat: String,
|
||||
},
|
||||
});
|
||||
|
||||
export default RepositorySchema;
|
||||
49
src/core/model/conference/conferences.types.ts
Normal file
49
src/core/model/conference/conferences.types.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import { Document, Model } from "mongoose";
|
||||
import { ConferenceStatus } from "../../types";
|
||||
|
||||
export interface IConference {
|
||||
name: string;
|
||||
conferenceID: string;
|
||||
startDate: Date;
|
||||
endDate: Date;
|
||||
url: string;
|
||||
status: ConferenceStatus;
|
||||
owners: string[];
|
||||
repositories: {
|
||||
id: string;
|
||||
addDate: Date;
|
||||
removeDate?: Date;
|
||||
}[];
|
||||
options: {
|
||||
expirationMode: "never" | "redirect" | "remove";
|
||||
expirationDate?: Date;
|
||||
update: boolean;
|
||||
image: boolean;
|
||||
pdf: boolean;
|
||||
notebook: boolean;
|
||||
link: boolean;
|
||||
page: boolean;
|
||||
};
|
||||
plan: {
|
||||
planID: string;
|
||||
pricePerRepository: number;
|
||||
quota: {
|
||||
repository: number;
|
||||
size: number;
|
||||
file: number;
|
||||
};
|
||||
};
|
||||
billing?: {
|
||||
name: string;
|
||||
email: string;
|
||||
address: string;
|
||||
address2?: string;
|
||||
city: string;
|
||||
zip: string;
|
||||
country: string;
|
||||
vat?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface IConferenceDocument extends IConference, Document {}
|
||||
export interface IConferenceModel extends Model<IConferenceDocument> {}
|
||||
11
src/core/model/repositories/repositories.model.ts
Normal file
11
src/core/model/repositories/repositories.model.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
import { model } from "mongoose";
|
||||
|
||||
import { IRepositoryDocument, IRepositoryModel } from "./repositories.types";
|
||||
import RepositorySchema from "./repositories.schema";
|
||||
|
||||
const RepositoryModel = model<IRepositoryDocument>(
|
||||
"Repository",
|
||||
RepositorySchema
|
||||
) as IRepositoryModel;
|
||||
|
||||
export default RepositoryModel;
|
||||
41
src/core/model/repositories/repositories.schema.ts
Normal file
41
src/core/model/repositories/repositories.schema.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
import { Schema } from "mongoose";
|
||||
|
||||
const RepositorySchema = new Schema({
|
||||
externalId: {
|
||||
type: String,
|
||||
index: { unique: true },
|
||||
},
|
||||
name: {
|
||||
type: String,
|
||||
index: true,
|
||||
},
|
||||
url: String,
|
||||
source: {
|
||||
type: String,
|
||||
default: "github",
|
||||
},
|
||||
hasPage: { type: Boolean, default: false },
|
||||
pageSource: {
|
||||
branch: { type: String },
|
||||
path: String,
|
||||
},
|
||||
branches: [
|
||||
{
|
||||
name: { type: String },
|
||||
commit: String,
|
||||
readme: String,
|
||||
},
|
||||
],
|
||||
defaultBranch: String,
|
||||
size: Number,
|
||||
status: {
|
||||
type: String,
|
||||
default: "ready",
|
||||
},
|
||||
dateOfEntry: {
|
||||
type: Date,
|
||||
default: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
export default RepositorySchema;
|
||||
25
src/core/model/repositories/repositories.types.ts
Normal file
25
src/core/model/repositories/repositories.types.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import { Document, Model } from "mongoose";
|
||||
|
||||
export interface IRepository {
|
||||
externalId: string;
|
||||
name: string;
|
||||
url?: string;
|
||||
source: "github";
|
||||
size?: number;
|
||||
defaultBranch?: string;
|
||||
hasPage: boolean;
|
||||
pageSource?: {
|
||||
branch: string;
|
||||
path: string;
|
||||
};
|
||||
branches?: {
|
||||
name: string;
|
||||
commit: string;
|
||||
readme?: string;
|
||||
}[];
|
||||
}
|
||||
|
||||
export interface IRepositoryDocument extends IRepository, Document {
|
||||
setLastUpdated: (this: IRepositoryDocument) => Promise<void>;
|
||||
}
|
||||
export interface IRepositoryModel extends Model<IRepositoryDocument> {}
|
||||
8
src/core/model/users/users.model.ts
Normal file
8
src/core/model/users/users.model.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { model } from "mongoose";
|
||||
|
||||
import { IUserDocument, IUserModel } from "./users.types";
|
||||
import UserSchema from "./users.schema";
|
||||
|
||||
const UserModel = model<IUserDocument>("user", UserSchema) as IUserModel;
|
||||
|
||||
export default UserModel;
|
||||
50
src/core/model/users/users.schema.ts
Normal file
50
src/core/model/users/users.schema.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { Schema } from "mongoose";
|
||||
|
||||
const UserSchema = new Schema({
|
||||
accessTokens: {
|
||||
github: { type: String },
|
||||
},
|
||||
externalIDs: {
|
||||
github: { type: String, index: true },
|
||||
},
|
||||
username: {
|
||||
type: String,
|
||||
index: { unique: true },
|
||||
},
|
||||
emails: [
|
||||
{
|
||||
email: { type: String },
|
||||
default: Boolean,
|
||||
},
|
||||
],
|
||||
isAdmin: { type: Boolean, default: false },
|
||||
photo: String,
|
||||
repositories: [
|
||||
{
|
||||
type: String,
|
||||
ref: "Repository",
|
||||
},
|
||||
],
|
||||
default: {
|
||||
terms: [String],
|
||||
options: {
|
||||
expirationMode: { type: String },
|
||||
update: Boolean,
|
||||
image: Boolean,
|
||||
pdf: Boolean,
|
||||
notebook: Boolean,
|
||||
link: Boolean,
|
||||
page: { type: String },
|
||||
},
|
||||
},
|
||||
status: {
|
||||
type: String,
|
||||
default: "active",
|
||||
},
|
||||
dateOfEntry: {
|
||||
type: Date,
|
||||
default: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
export default UserSchema;
|
||||
39
src/core/model/users/users.types.ts
Normal file
39
src/core/model/users/users.types.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { Document, Model } from "mongoose";
|
||||
|
||||
export interface IUser {
|
||||
accessTokens: {
|
||||
github: string;
|
||||
};
|
||||
externalIDs: {
|
||||
github: string;
|
||||
};
|
||||
username: string;
|
||||
isAdmin: boolean;
|
||||
emails: {
|
||||
email: string;
|
||||
default: boolean;
|
||||
}[];
|
||||
photo?: string;
|
||||
|
||||
repositories?: number[];
|
||||
default?: {
|
||||
terms: string[];
|
||||
options: {
|
||||
expirationMode: "never" | "redirect" | "";
|
||||
update: boolean;
|
||||
image: boolean;
|
||||
pdf: boolean;
|
||||
notebook: boolean;
|
||||
link: boolean;
|
||||
page: string | null;
|
||||
};
|
||||
};
|
||||
status?: "active" | "removed";
|
||||
dateOfEntry?: Date;
|
||||
lastUpdated?: Date;
|
||||
}
|
||||
|
||||
export interface IUserDocument extends IUser, Document {
|
||||
setLastUpdated: (this: IUserDocument) => Promise<void>;
|
||||
}
|
||||
export interface IUserModel extends Model<IUserDocument> {}
|
||||
27
src/core/source/GitHubBase.ts
Normal file
27
src/core/source/GitHubBase.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import { Readable } from "stream";
|
||||
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { Tree } from "../types";
|
||||
import { SourceBase } from "./Source";
|
||||
|
||||
export interface GitHubBaseData {
|
||||
getToken: () => string | Promise<string>;
|
||||
repoId: string;
|
||||
organization: string;
|
||||
repoName: string;
|
||||
commit: string;
|
||||
}
|
||||
|
||||
export default abstract class GitHubBase implements SourceBase {
|
||||
abstract type: "GitHubDownload" | "GitHubStream" | "Zip";
|
||||
accessToken: string | undefined;
|
||||
|
||||
constructor(readonly data: GitHubBaseData) {}
|
||||
|
||||
abstract getFileContent(
|
||||
file: AnonymizedFile,
|
||||
progress?: (status: string) => void
|
||||
): Promise<Readable>;
|
||||
|
||||
abstract getFiles(progress?: (status: string) => void): Promise<Tree>;
|
||||
}
|
||||
121
src/core/source/GitHubDownload.ts
Normal file
121
src/core/source/GitHubDownload.ts
Normal file
@@ -0,0 +1,121 @@
|
||||
import got from "got";
|
||||
import { Readable } from "stream";
|
||||
import { OctokitResponse } from "@octokit/types";
|
||||
|
||||
import storage from "../storage";
|
||||
import GitHubBase, { GitHubBaseData } from "./GitHubBase";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { FILE_TYPE } from "../storage/Storage";
|
||||
import { octokit } from "../GitHubUtils";
|
||||
import AnonymousError from "../AnonymousError";
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
|
||||
export default class GitHubDownload extends GitHubBase {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubDownload";
|
||||
constructor(data: GitHubBaseData) {
|
||||
super(data);
|
||||
}
|
||||
|
||||
private async _getZipUrl(): Promise<OctokitResponse<unknown, 302>> {
|
||||
const oct = octokit(await this.data.getToken());
|
||||
return oct.rest.repos.downloadZipballArchive({
|
||||
owner: this.data.organization,
|
||||
repo: this.data.repoName,
|
||||
ref: this.data.commit || "HEAD",
|
||||
method: "HEAD",
|
||||
});
|
||||
}
|
||||
|
||||
async download(progress?: (status: string) => void) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHDownload.download");
|
||||
span.setAttribute("repoId", this.data.repoId);
|
||||
try {
|
||||
let response: OctokitResponse<unknown, number>;
|
||||
try {
|
||||
response = await this._getZipUrl();
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
throw new AnonymousError("repo_not_accessible", {
|
||||
httpStatus: 404,
|
||||
object: this.data,
|
||||
cause: error as Error,
|
||||
});
|
||||
}
|
||||
await storage.mk(this.data.repoId);
|
||||
let downloadProgress: { transferred: number } | undefined = undefined;
|
||||
let progressTimeout;
|
||||
let inDownload = true;
|
||||
|
||||
async function updateProgress() {
|
||||
if (inDownload) {
|
||||
if (progress) {
|
||||
progress(downloadProgress?.transferred?.toString() || "");
|
||||
}
|
||||
progressTimeout = setTimeout(updateProgress, 1500);
|
||||
}
|
||||
}
|
||||
updateProgress();
|
||||
|
||||
try {
|
||||
const downloadStream = got.stream(response.url);
|
||||
downloadStream.addListener("downloadProgress", async (p) => {
|
||||
downloadProgress = p;
|
||||
});
|
||||
await storage.extractZip(
|
||||
this.data.repoId,
|
||||
"",
|
||||
downloadStream,
|
||||
this.type
|
||||
);
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
throw new AnonymousError("unable_to_download", {
|
||||
httpStatus: 500,
|
||||
cause: error as Error,
|
||||
object: this.data,
|
||||
});
|
||||
} finally {
|
||||
inDownload = false;
|
||||
clearTimeout(progressTimeout);
|
||||
}
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
async getFileContent(
|
||||
file: AnonymizedFile,
|
||||
progress?: (status: string) => void
|
||||
): Promise<Readable> {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("GHDownload.getFileContent");
|
||||
span.setAttribute("repoId", file.repository.repoId);
|
||||
try {
|
||||
const exists = await storage.exists(file.filePath);
|
||||
if (exists === FILE_TYPE.FILE) {
|
||||
return storage.read(this.data.repoId, file.filePath);
|
||||
} else if (exists === FILE_TYPE.FOLDER) {
|
||||
throw new AnonymousError("folder_not_supported", {
|
||||
httpStatus: 400,
|
||||
object: file,
|
||||
});
|
||||
}
|
||||
// will throw an error if the file is not in the repository
|
||||
await file.originalPath();
|
||||
|
||||
// the cache is not ready, we need to download the repository
|
||||
await this.download(progress);
|
||||
return storage.read(this.data.repoId, file.filePath);
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
async getFiles(progress?: (status: string) => void) {
|
||||
if ((await storage.exists(this.data.repoId)) === FILE_TYPE.NOT_FOUND) {
|
||||
await this.download(progress);
|
||||
}
|
||||
return storage.listFiles(this.data.repoId);
|
||||
}
|
||||
}
|
||||
299
src/core/source/GitHubRepository.ts
Normal file
299
src/core/source/GitHubRepository.ts
Normal file
@@ -0,0 +1,299 @@
|
||||
import { Branch } from "../types";
|
||||
import * as gh from "parse-github-url";
|
||||
import { RestEndpointMethodTypes } from "@octokit/rest";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
|
||||
import AnonymousError from "../AnonymousError";
|
||||
import { isConnected } from "../../server/database";
|
||||
import { octokit } from "../GitHubUtils";
|
||||
import { IRepositoryDocument } from "../model/repositories/repositories.types";
|
||||
import RepositoryModel from "../model/repositories/repositories.model";
|
||||
|
||||
export class GitHubRepository {
|
||||
private _data: Partial<{
|
||||
[P in keyof IRepositoryDocument]: IRepositoryDocument[P];
|
||||
}>;
|
||||
constructor(
|
||||
data: Partial<{ [P in keyof IRepositoryDocument]: IRepositoryDocument[P] }>
|
||||
) {
|
||||
this._data = data;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
repo: this.repo,
|
||||
owner: this.owner,
|
||||
hasPage: this._data.hasPage,
|
||||
pageSource: this._data.pageSource,
|
||||
fullName: this.fullName,
|
||||
defaultBranch: this._data.defaultBranch,
|
||||
size: this.size,
|
||||
};
|
||||
}
|
||||
|
||||
get model() {
|
||||
return this._data;
|
||||
}
|
||||
|
||||
public get fullName(): string | undefined {
|
||||
return this._data.name;
|
||||
}
|
||||
|
||||
public get id(): string | undefined {
|
||||
return this._data.externalId;
|
||||
}
|
||||
|
||||
public get size(): number | undefined {
|
||||
return this._data.size;
|
||||
}
|
||||
|
||||
async getCommitInfo(
|
||||
sha: string,
|
||||
opt: {
|
||||
accessToken: string;
|
||||
}
|
||||
) {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("GHRepository.getCommitInfo");
|
||||
span.setAttribute("owner", this.owner);
|
||||
span.setAttribute("repo", this.repo);
|
||||
try {
|
||||
const oct = octokit(opt.accessToken);
|
||||
const commit = await oct.repos.getCommit({
|
||||
owner: this.owner,
|
||||
repo: this.repo,
|
||||
ref: sha,
|
||||
});
|
||||
return commit.data;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
async branches(opt: {
|
||||
accessToken: string;
|
||||
force?: boolean;
|
||||
}): Promise<Branch[]> {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHRepository.branches");
|
||||
span.setAttribute("owner", this.owner);
|
||||
span.setAttribute("repo", this.repo);
|
||||
try {
|
||||
if (
|
||||
!this._data.branches ||
|
||||
this._data.branches.length == 0 ||
|
||||
opt?.force === true
|
||||
) {
|
||||
// get the list of repo from github
|
||||
const oct = octokit(opt.accessToken);
|
||||
try {
|
||||
const branches = (
|
||||
await oct.paginate("GET /repos/{owner}/{repo}/branches", {
|
||||
owner: this.owner,
|
||||
repo: this.repo,
|
||||
per_page: 100,
|
||||
})
|
||||
).map((b) => {
|
||||
return {
|
||||
name: b.name,
|
||||
commit: b.commit.sha,
|
||||
readme: this._data.branches?.filter(
|
||||
(f: Branch) => f.name == b.name
|
||||
)[0]?.readme,
|
||||
} as Branch;
|
||||
});
|
||||
this._data.branches = branches;
|
||||
if (isConnected) {
|
||||
await RepositoryModel.updateOne(
|
||||
{ externalId: this.id },
|
||||
{ $set: { branches } }
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
throw new AnonymousError("repo_not_found", {
|
||||
httpStatus: (error as any).status,
|
||||
cause: error as Error,
|
||||
object: this,
|
||||
});
|
||||
}
|
||||
} else if (isConnected) {
|
||||
const q = await RepositoryModel.findOne({ externalId: this.id }).select(
|
||||
"branches"
|
||||
);
|
||||
this._data.branches = q?.branches;
|
||||
}
|
||||
|
||||
return this._data.branches || [];
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
async readme(opt: {
|
||||
branch?: string;
|
||||
force?: boolean;
|
||||
accessToken: string;
|
||||
}): Promise<string | undefined> {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHRepository.readme");
|
||||
span.setAttribute("owner", this.owner);
|
||||
span.setAttribute("repo", this.repo);
|
||||
try {
|
||||
if (!opt.branch) opt.branch = this._data.defaultBranch || "master";
|
||||
|
||||
const model = await RepositoryModel.findOne({
|
||||
externalId: this.id,
|
||||
}).select("branches");
|
||||
|
||||
if (!model) {
|
||||
throw new AnonymousError("repo_not_found", { httpStatus: 404 });
|
||||
}
|
||||
|
||||
this._data.branches = await this.branches(opt);
|
||||
model.branches = this._data.branches;
|
||||
|
||||
const selected = model.branches.filter((f) => f.name == opt.branch)[0];
|
||||
if (selected && (!selected.readme || opt?.force === true)) {
|
||||
// get the list of repo from github
|
||||
const oct = octokit(opt.accessToken);
|
||||
try {
|
||||
const ghRes = await oct.repos.getReadme({
|
||||
owner: this.owner,
|
||||
repo: this.repo,
|
||||
ref: selected?.commit,
|
||||
});
|
||||
const readme = Buffer.from(
|
||||
ghRes.data.content,
|
||||
ghRes.data.encoding as BufferEncoding
|
||||
).toString("utf-8");
|
||||
selected.readme = readme;
|
||||
await model.save();
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
throw new AnonymousError("readme_not_available", {
|
||||
httpStatus: 404,
|
||||
cause: error as Error,
|
||||
object: this,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (!selected) {
|
||||
throw new AnonymousError("readme_not_available", {
|
||||
httpStatus: 404,
|
||||
object: this,
|
||||
});
|
||||
}
|
||||
|
||||
return selected.readme;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
public get owner(): string {
|
||||
if (!this.fullName) {
|
||||
throw new AnonymousError("invalid_repo", {
|
||||
httpStatus: 400,
|
||||
object: this,
|
||||
});
|
||||
}
|
||||
const repo = gh(this.fullName);
|
||||
if (!repo) {
|
||||
throw new AnonymousError("invalid_repo", {
|
||||
httpStatus: 400,
|
||||
object: this,
|
||||
});
|
||||
}
|
||||
return repo.owner || this.fullName;
|
||||
}
|
||||
|
||||
public get repo(): string {
|
||||
if (!this.fullName) {
|
||||
throw new AnonymousError("invalid_repo", {
|
||||
httpStatus: 400,
|
||||
object: this,
|
||||
});
|
||||
}
|
||||
const repo = gh(this.fullName);
|
||||
if (!repo) {
|
||||
throw new AnonymousError("invalid_repo", {
|
||||
httpStatus: 400,
|
||||
object: this,
|
||||
});
|
||||
}
|
||||
return repo.name || this.fullName;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getRepositoryFromGitHub(opt: {
|
||||
owner: string;
|
||||
repo: string;
|
||||
accessToken: string;
|
||||
}) {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("GHRepository.getRepositoryFromGitHub");
|
||||
span.setAttribute("owner", opt.owner);
|
||||
span.setAttribute("repo", opt.repo);
|
||||
try {
|
||||
if (opt.repo.indexOf(".git") > -1) {
|
||||
opt.repo = opt.repo.replace(".git", "");
|
||||
}
|
||||
const oct = octokit(opt.accessToken);
|
||||
let r: RestEndpointMethodTypes["repos"]["get"]["response"]["data"];
|
||||
try {
|
||||
r = (
|
||||
await oct.repos.get({
|
||||
owner: opt.owner,
|
||||
repo: opt.repo,
|
||||
})
|
||||
).data;
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
throw new AnonymousError("repo_not_found", {
|
||||
httpStatus: (error as any).status,
|
||||
object: {
|
||||
owner: opt.owner,
|
||||
repo: opt.repo,
|
||||
},
|
||||
cause: error as Error,
|
||||
});
|
||||
}
|
||||
if (!r)
|
||||
throw new AnonymousError("repo_not_found", {
|
||||
httpStatus: 404,
|
||||
object: {
|
||||
owner: opt.owner,
|
||||
repo: opt.repo,
|
||||
},
|
||||
});
|
||||
let model = new RepositoryModel({ externalId: "gh_" + r.id });
|
||||
if (isConnected) {
|
||||
const dbModel = await RepositoryModel.findOne({
|
||||
externalId: "gh_" + r.id,
|
||||
});
|
||||
if (dbModel) {
|
||||
model = dbModel;
|
||||
}
|
||||
}
|
||||
model.name = r.full_name;
|
||||
model.url = r.html_url;
|
||||
model.size = r.size;
|
||||
model.defaultBranch = r.default_branch;
|
||||
model.hasPage = r.has_pages;
|
||||
if (model.hasPage) {
|
||||
const ghPageRes = await oct.repos.getPages({
|
||||
owner: opt.owner,
|
||||
repo: opt.repo,
|
||||
});
|
||||
model.pageSource = ghPageRes.data.source;
|
||||
}
|
||||
if (isConnected) {
|
||||
await model.save();
|
||||
}
|
||||
return new GitHubRepository(model);
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
301
src/core/source/GitHubStream.ts
Normal file
301
src/core/source/GitHubStream.ts
Normal file
@@ -0,0 +1,301 @@
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import GitHubBase, { GitHubBaseData } from "./GitHubBase";
|
||||
import storage from "../storage";
|
||||
import { Tree } from "../types";
|
||||
import * as path from "path";
|
||||
import got from "got";
|
||||
|
||||
import * as stream from "stream";
|
||||
import AnonymousError from "../AnonymousError";
|
||||
import config from "../../config";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { FILE_TYPE } from "../storage/Storage";
|
||||
import { octokit } from "../GitHubUtils";
|
||||
|
||||
export default class GitHubStream extends GitHubBase {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream";
|
||||
|
||||
constructor(data: GitHubBaseData) {
|
||||
super(data);
|
||||
}
|
||||
|
||||
downloadFile(token: string, sha: string) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile");
|
||||
span.setAttribute("sha", sha);
|
||||
const oct = octokit(token);
|
||||
try {
|
||||
const { url } = oct.rest.git.getBlob.endpoint({
|
||||
owner: this.data.organization,
|
||||
repo: this.data.repoName,
|
||||
file_sha: sha,
|
||||
});
|
||||
return got.stream(url, {
|
||||
headers: {
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
accept: "application/vnd.github.raw+json",
|
||||
authorization: `token ${token}`,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
// span.recordException(error as Error);
|
||||
throw new AnonymousError("repo_not_accessible", {
|
||||
httpStatus: 404,
|
||||
object: this.data,
|
||||
cause: error as Error,
|
||||
});
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("GHStream.getFileContent");
|
||||
span.setAttribute("repoId", file.repository.repoId);
|
||||
span.setAttribute("file", file.anonymizedPath);
|
||||
try {
|
||||
try {
|
||||
file.filePath;
|
||||
} catch (_) {
|
||||
// compute the original path if ambiguous
|
||||
await file.originalPath();
|
||||
}
|
||||
const fileInfo = await storage.exists(
|
||||
file.repository.repoId,
|
||||
file.filePath
|
||||
);
|
||||
if (fileInfo == FILE_TYPE.FILE) {
|
||||
return storage.read(file.repository.repoId, file.filePath);
|
||||
} else if (fileInfo == FILE_TYPE.FOLDER) {
|
||||
throw new AnonymousError("folder_not_supported", {
|
||||
httpStatus: 400,
|
||||
object: file,
|
||||
});
|
||||
}
|
||||
span.setAttribute("path", file.filePath);
|
||||
const file_sha = await file.sha();
|
||||
if (!file_sha) {
|
||||
throw new AnonymousError("file_not_accessible", {
|
||||
httpStatus: 404,
|
||||
object: file,
|
||||
});
|
||||
}
|
||||
const content = this.downloadFile(await this.data.getToken(), file_sha);
|
||||
|
||||
// duplicate the stream to write it to the storage
|
||||
const stream1 = content.pipe(new stream.PassThrough());
|
||||
const stream2 = content.pipe(new stream.PassThrough());
|
||||
|
||||
content.on("error", (error) => {
|
||||
error = new AnonymousError("file_not_found", {
|
||||
httpStatus: (error as any).status || (error as any).httpStatus,
|
||||
cause: error as Error,
|
||||
object: file,
|
||||
});
|
||||
stream1.emit("error", error);
|
||||
stream2.emit("error", error);
|
||||
});
|
||||
|
||||
storage.write(file.repository.repoId, file.filePath, stream1, this.type);
|
||||
return stream2;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
async getFiles() {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.getFiles");
|
||||
span.setAttribute("repoId", this.data.repoId);
|
||||
try {
|
||||
return this.getTree(this.data.commit);
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
private async getTree(
|
||||
sha: string,
|
||||
truncatedTree: Tree = {},
|
||||
parentPath: string = "",
|
||||
count = {
|
||||
file: 0,
|
||||
request: 0,
|
||||
}
|
||||
) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.getTree");
|
||||
span.setAttribute("sha", sha);
|
||||
|
||||
let ghRes: Awaited<ReturnType<typeof this.getGHTree>>;
|
||||
try {
|
||||
count.request++;
|
||||
ghRes = await this.getGHTree(sha, { recursive: true });
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
span.recordException(error as Error);
|
||||
if ((error as any).status == 409) {
|
||||
// cannot be empty otherwise it would try to download it again
|
||||
span.end();
|
||||
return { __: {} };
|
||||
} else {
|
||||
const err = new AnonymousError("repo_not_accessible", {
|
||||
httpStatus: (error as any).status,
|
||||
cause: error as Error,
|
||||
object: {
|
||||
tree_sha: sha,
|
||||
},
|
||||
});
|
||||
span.recordException(err);
|
||||
span.end();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
const tree = this.tree2Tree(ghRes.tree, truncatedTree, parentPath);
|
||||
count.file += ghRes.tree.length;
|
||||
if (ghRes.truncated) {
|
||||
await this.getTruncatedTree(sha, tree, parentPath, count);
|
||||
}
|
||||
span.end();
|
||||
return tree;
|
||||
}
|
||||
|
||||
private async getGHTree(sha: string, opt = { recursive: true }) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.getGHTree");
|
||||
span.setAttribute("sha", sha);
|
||||
try {
|
||||
const oct = octokit(await this.data.getToken());
|
||||
const ghRes = await oct.git.getTree({
|
||||
owner: this.data.organization,
|
||||
repo: this.data.repoName,
|
||||
tree_sha: sha,
|
||||
recursive: opt.recursive ? "1" : undefined,
|
||||
});
|
||||
return ghRes.data;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
private async getTruncatedTree(
|
||||
sha: string,
|
||||
truncatedTree: Tree = {},
|
||||
parentPath: string = "",
|
||||
count = {
|
||||
file: 0,
|
||||
request: 0,
|
||||
},
|
||||
depth = 0
|
||||
) {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("GHStream.getTruncatedTree");
|
||||
span.setAttribute("sha", sha);
|
||||
span.setAttribute("parentPath", parentPath);
|
||||
try {
|
||||
count.request++;
|
||||
let data = null;
|
||||
|
||||
try {
|
||||
data = await this.getGHTree(sha, {
|
||||
recursive: false,
|
||||
});
|
||||
this.tree2Tree(data.tree, truncatedTree, parentPath);
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
return;
|
||||
}
|
||||
|
||||
count.file += data.tree.length;
|
||||
if (data.tree.length < 100 && count.request < 200) {
|
||||
const promises: Promise<any>[] = [];
|
||||
for (const file of data.tree) {
|
||||
if (file.type == "tree" && file.path && file.sha) {
|
||||
const elementPath = path.join(parentPath, file.path);
|
||||
promises.push(
|
||||
this.getTruncatedTree(
|
||||
file.sha,
|
||||
truncatedTree,
|
||||
elementPath,
|
||||
count,
|
||||
depth + 1
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
await Promise.all(promises);
|
||||
} else {
|
||||
try {
|
||||
const data = await this.getGHTree(sha, {
|
||||
recursive: true,
|
||||
});
|
||||
this.tree2Tree(data.tree, truncatedTree, parentPath);
|
||||
if (data.truncated) {
|
||||
// TODO: TRUNCATED
|
||||
}
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
private tree2Tree(
|
||||
tree: {
|
||||
path?: string;
|
||||
mode?: string;
|
||||
type?: string;
|
||||
sha?: string;
|
||||
size?: number;
|
||||
url?: string;
|
||||
}[],
|
||||
partialTree: Tree = {},
|
||||
parentPath: string = ""
|
||||
) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.tree2Tree");
|
||||
span.setAttribute("parentPath", parentPath);
|
||||
try {
|
||||
for (let elem of tree) {
|
||||
let current = partialTree;
|
||||
|
||||
if (!elem.path) continue;
|
||||
|
||||
const paths = path.join(parentPath, elem.path).split("/");
|
||||
|
||||
// if elem is a folder iterate on all folders if it is a file stop before the filename
|
||||
const end = elem.type == "tree" ? paths.length : paths.length - 1;
|
||||
for (let i = 0; i < end; i++) {
|
||||
let p = paths[i];
|
||||
if (p[0] == "$") {
|
||||
p = "\\" + p;
|
||||
}
|
||||
if (!current[p]) {
|
||||
current[p] = {};
|
||||
}
|
||||
current = current[p] as Tree;
|
||||
}
|
||||
|
||||
// if elem is a file add the file size in the file list
|
||||
if (elem.type == "blob") {
|
||||
if (Object.keys(current).length > config.MAX_FILE_FOLDER) {
|
||||
// TODO: TRUNCATED
|
||||
continue;
|
||||
}
|
||||
let p = paths[end];
|
||||
if (p[0] == "$") {
|
||||
p = "\\" + p;
|
||||
}
|
||||
current[p] = {
|
||||
size: elem.size || 0, // size in bit
|
||||
sha: elem.sha || "",
|
||||
};
|
||||
}
|
||||
}
|
||||
return partialTree;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
}
|
||||
24
src/core/source/Source.ts
Normal file
24
src/core/source/Source.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
import { Readable } from "stream";
|
||||
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { Tree } from "../types";
|
||||
import GitHubDownload from "./GitHubDownload";
|
||||
import GitHubStream from "./GitHubStream";
|
||||
import Zip from "./Zip";
|
||||
|
||||
export type Source = GitHubDownload | GitHubStream | Zip;
|
||||
|
||||
export interface SourceBase {
|
||||
readonly type: string;
|
||||
|
||||
/**
|
||||
* Retrieve the fie content
|
||||
* @param file the file of the content to retrieve
|
||||
*/
|
||||
getFileContent(file: AnonymizedFile): Promise<Readable>;
|
||||
|
||||
/**
|
||||
* Get all the files from a specific source
|
||||
*/
|
||||
getFiles(progress?: (status: string) => void): Promise<Tree>;
|
||||
}
|
||||
28
src/core/source/Zip.ts
Normal file
28
src/core/source/Zip.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import * as stream from "stream";
|
||||
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import storage from "../storage";
|
||||
import { SourceBase } from "./Source";
|
||||
|
||||
export default class Zip implements SourceBase {
|
||||
type = "Zip";
|
||||
url?: string;
|
||||
|
||||
constructor(data: any, readonly repoId: string) {
|
||||
this.url = data.url;
|
||||
}
|
||||
|
||||
async getFiles() {
|
||||
return storage.listFiles(this.repoId);
|
||||
}
|
||||
|
||||
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
|
||||
return storage.read(file.repository.repoId, file.filePath);
|
||||
}
|
||||
|
||||
toJSON(): any {
|
||||
return {
|
||||
type: this.type,
|
||||
};
|
||||
}
|
||||
}
|
||||
7
src/core/storage.ts
Normal file
7
src/core/storage.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
import config from "../config";
|
||||
import FileSystem from "./storage/FileSystem";
|
||||
import S3Storage from "./storage/S3";
|
||||
|
||||
export default (() => {
|
||||
return config.STORAGE == "s3" ? new S3Storage() : new FileSystem();
|
||||
})();
|
||||
224
src/core/storage/FileSystem.ts
Normal file
224
src/core/storage/FileSystem.ts
Normal file
@@ -0,0 +1,224 @@
|
||||
import { Tree } from "../types";
|
||||
import config from "../../config";
|
||||
import * as fs from "fs";
|
||||
import { Extract } from "unzip-stream";
|
||||
import { join, basename, dirname } from "path";
|
||||
import { Response } from "express";
|
||||
import { Readable, pipeline, Transform } from "stream";
|
||||
import * as archiver from "archiver";
|
||||
import { promisify } from "util";
|
||||
import { lookup } from "mime-types";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import StorageBase, { FILE_TYPE } from "./Storage";
|
||||
|
||||
export default class FileSystem extends StorageBase {
|
||||
type = "FileSystem";
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async exists(repoId: string, p: string = ""): Promise<FILE_TYPE> {
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("fs.exists", async (span) => {
|
||||
span.setAttribute("path", p);
|
||||
span.setAttribute("full-path", fullPath);
|
||||
try {
|
||||
const stat = await fs.promises.stat(fullPath);
|
||||
if (stat.isDirectory()) return FILE_TYPE.FOLDER;
|
||||
if (stat.isFile()) return FILE_TYPE.FILE;
|
||||
} catch (_) {
|
||||
// ignore file not found or not downloaded
|
||||
}
|
||||
span.end();
|
||||
return FILE_TYPE.NOT_FOUND;
|
||||
});
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async send(repoId: string, p: string, res: Response) {
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("fs.send", async (span) => {
|
||||
span.setAttribute("path", fullPath);
|
||||
res.sendFile(fullPath, { dotfiles: "allow" }, (err) => {
|
||||
if (err) {
|
||||
span.recordException(err);
|
||||
}
|
||||
span.end();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async read(repoId: string, p: string): Promise<Readable> {
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
||||
return fs.createReadStream(fullPath);
|
||||
}
|
||||
|
||||
async fileInfo(repoId: string, path: string) {
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), path);
|
||||
const info = await fs.promises.stat(fullPath);
|
||||
return {
|
||||
size: info.size,
|
||||
lastModified: info.mtime,
|
||||
contentType: info.isDirectory()
|
||||
? "application/x-directory"
|
||||
: (lookup(fullPath) as string),
|
||||
};
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async write(
|
||||
repoId: string,
|
||||
p: string,
|
||||
data: string | Readable
|
||||
): Promise<void> {
|
||||
const span = trace.getTracer("ano-file").startSpan("fs.write");
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
||||
span.setAttribute("path", fullPath);
|
||||
try {
|
||||
await this.mk(repoId, dirname(p));
|
||||
if (data instanceof Readable) {
|
||||
data.on("error", (err) => {
|
||||
this.rm(repoId, p);
|
||||
});
|
||||
}
|
||||
return await fs.promises.writeFile(fullPath, data, "utf-8");
|
||||
} catch (err: any) {
|
||||
span.recordException(err);
|
||||
// throw err;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async rm(repoId: string, dir: string = ""): Promise<void> {
|
||||
const span = trace.getTracer("ano-file").startSpan("fs.rm");
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), dir);
|
||||
span.setAttribute("path", fullPath);
|
||||
try {
|
||||
await fs.promises.rm(fullPath, {
|
||||
force: true,
|
||||
recursive: true,
|
||||
});
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async mk(repoId: string, dir: string = ""): Promise<void> {
|
||||
const span = trace.getTracer("ano-file").startSpan("fs.mk");
|
||||
span.setAttribute("path", dir);
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), dir);
|
||||
try {
|
||||
await fs.promises.mkdir(fullPath, {
|
||||
recursive: true,
|
||||
});
|
||||
} catch (err: any) {
|
||||
if (err.code !== "EEXIST") {
|
||||
span.recordException(err);
|
||||
throw err;
|
||||
}
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async listFiles(
|
||||
repoId: string,
|
||||
dir: string = "",
|
||||
opt: {
|
||||
onEntry?: (file: { path: string; size: number }) => void;
|
||||
} = {}
|
||||
): Promise<Tree> {
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("fs.listFiles", async (span) => {
|
||||
span.setAttribute("path", dir);
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), dir);
|
||||
let files = await fs.promises.readdir(fullPath);
|
||||
const output: Tree = {};
|
||||
for (let file of files) {
|
||||
let filePath = join(dir, file);
|
||||
try {
|
||||
const stats = await fs.promises.stat(join(fullPath, filePath));
|
||||
if (file[0] == "$") {
|
||||
file = "\\" + file;
|
||||
}
|
||||
if (stats.isDirectory()) {
|
||||
output[file] = await this.listFiles(repoId, filePath, opt);
|
||||
} else if (stats.isFile()) {
|
||||
if (opt.onEntry) {
|
||||
opt.onEntry({
|
||||
path: filePath,
|
||||
size: stats.size,
|
||||
});
|
||||
}
|
||||
output[file] = { size: stats.size, sha: stats.ino.toString() };
|
||||
}
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
}
|
||||
}
|
||||
span.end();
|
||||
return output;
|
||||
});
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async extractZip(repoId: string, p: string, data: Readable): Promise<void> {
|
||||
const pipe = promisify(pipeline);
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
||||
return pipe(
|
||||
data,
|
||||
Extract({
|
||||
path: fullPath,
|
||||
decodeString: (buf) => {
|
||||
const name = buf.toString();
|
||||
const newName = name.substr(name.indexOf("/") + 1);
|
||||
if (newName == "") return "/dev/null";
|
||||
return newName;
|
||||
},
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async archive(
|
||||
repoId: string,
|
||||
dir: string,
|
||||
opt?: {
|
||||
format?: "zip" | "tar";
|
||||
fileTransformer?: (path: string) => Transform;
|
||||
}
|
||||
) {
|
||||
const archive = archiver(opt?.format || "zip", {});
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), dir);
|
||||
|
||||
await this.listFiles(repoId, dir, {
|
||||
onEntry: async (file) => {
|
||||
let rs = await this.read(repoId, file.path);
|
||||
if (opt?.fileTransformer) {
|
||||
// apply transformation on the stream
|
||||
rs = rs.pipe(opt.fileTransformer(file.path));
|
||||
}
|
||||
const f = file.path.replace(fullPath, "");
|
||||
archive.append(rs, {
|
||||
name: basename(f),
|
||||
prefix: dirname(f),
|
||||
});
|
||||
},
|
||||
}).then(() => {
|
||||
archive.finalize();
|
||||
});
|
||||
return archive;
|
||||
}
|
||||
}
|
||||
391
src/core/storage/S3.ts
Normal file
391
src/core/storage/S3.ts
Normal file
@@ -0,0 +1,391 @@
|
||||
import {
|
||||
GetObjectCommand,
|
||||
ListObjectsV2CommandOutput,
|
||||
PutObjectCommandInput,
|
||||
S3,
|
||||
} from "@aws-sdk/client-s3";
|
||||
import { Upload } from "@aws-sdk/lib-storage";
|
||||
import { NodeHttpHandler } from "@smithy/node-http-handler";
|
||||
import config from "../../config";
|
||||
import { pipeline, Readable, Transform } from "stream";
|
||||
import ArchiveStreamToS3 from "decompress-stream-to-s3";
|
||||
import { Response } from "express";
|
||||
import { contentType } from "mime-types";
|
||||
import * as archiver from "archiver";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { dirname, basename, join } from "path";
|
||||
import { Tree, TreeFile } from "../types";
|
||||
import AnonymousError from "../AnonymousError";
|
||||
import StorageBase, { FILE_TYPE } from "./Storage";
|
||||
|
||||
export default class S3Storage extends StorageBase {
|
||||
type = "AWS";
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
if (!config.S3_BUCKET)
|
||||
throw new AnonymousError("s3_config_not_provided", {
|
||||
httpStatus: 500,
|
||||
});
|
||||
}
|
||||
|
||||
private client(timeout = 10000) {
|
||||
if (!config.S3_CLIENT_ID) throw new Error("S3_CLIENT_ID not set");
|
||||
if (!config.S3_CLIENT_SECRET) throw new Error("S3_CLIENT_SECRET not set");
|
||||
if (!config.S3_REGION) throw new Error("S3_REGION not set");
|
||||
if (!config.S3_ENDPOINT) throw new Error("S3_ENDPOINT not set");
|
||||
return new S3({
|
||||
credentials: {
|
||||
accessKeyId: config.S3_CLIENT_ID,
|
||||
secretAccessKey: config.S3_CLIENT_SECRET,
|
||||
},
|
||||
region: config.S3_REGION,
|
||||
endpoint: config.S3_ENDPOINT,
|
||||
requestHandler: new NodeHttpHandler({
|
||||
requestTimeout: timeout,
|
||||
connectionTimeout: timeout,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async exists(repoId: string, path: string = ""): Promise<FILE_TYPE> {
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.exists");
|
||||
span.setAttribute("path", path);
|
||||
try {
|
||||
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
|
||||
try {
|
||||
// if we can get the file info, it is a file
|
||||
await this.fileInfo(repoId, path);
|
||||
return FILE_TYPE.FILE;
|
||||
} catch (err) {
|
||||
// check if it is a directory
|
||||
const data = await this.client().listObjectsV2({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Prefix: join(this.repoPath(repoId), path),
|
||||
MaxKeys: 1,
|
||||
});
|
||||
return (data.Contents?.length || 0) > 0
|
||||
? FILE_TYPE.FOLDER
|
||||
: FILE_TYPE.NOT_FOUND;
|
||||
}
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async mk(repoId: string, dir: string = ""): Promise<void> {
|
||||
// no need to create folder on S3
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async rm(repoId: string, dir: string = ""): Promise<void> {
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.rm");
|
||||
span.setAttribute("repoId", repoId);
|
||||
span.setAttribute("path", dir);
|
||||
try {
|
||||
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
|
||||
const data = await this.client(200000).listObjectsV2({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Prefix: join(this.repoPath(repoId), dir),
|
||||
MaxKeys: 100,
|
||||
});
|
||||
|
||||
const params = {
|
||||
Bucket: config.S3_BUCKET,
|
||||
Delete: { Objects: new Array<{ Key: string }>() },
|
||||
};
|
||||
|
||||
data.Contents?.forEach(function (content) {
|
||||
if (content.Key) {
|
||||
params.Delete.Objects.push({ Key: content.Key });
|
||||
}
|
||||
});
|
||||
|
||||
if (params.Delete.Objects.length == 0) {
|
||||
// nothing to remove
|
||||
return;
|
||||
}
|
||||
await this.client(200000).deleteObjects(params);
|
||||
|
||||
if (data.IsTruncated) {
|
||||
await this.rm(repoId, dir);
|
||||
}
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async send(repoId: string, path: string, res: Response) {
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.send");
|
||||
span.setAttribute("repoId", repoId);
|
||||
span.setAttribute("path", path);
|
||||
try {
|
||||
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
|
||||
try {
|
||||
const command = new GetObjectCommand({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Key: join(this.repoPath(repoId), path),
|
||||
});
|
||||
const s = await this.client().send(command);
|
||||
res.status(200);
|
||||
if (s.ContentType) {
|
||||
res.contentType(s.ContentType);
|
||||
}
|
||||
if (s.ContentLength) {
|
||||
res.set("Content-Length", s.ContentLength.toString());
|
||||
}
|
||||
if (s.Body) {
|
||||
(s.Body as Readable)?.pipe(res);
|
||||
} else {
|
||||
res.end();
|
||||
}
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
try {
|
||||
res.status(500);
|
||||
} catch (err) {
|
||||
console.error(`[ERROR] S3 send ${path}`, err);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
async fileInfo(repoId: string, path: string) {
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.fileInfo");
|
||||
span.setAttribute("repoId", repoId);
|
||||
span.setAttribute("path", path);
|
||||
try {
|
||||
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
|
||||
const info = await this.client(3000).headObject({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Key: join(this.repoPath(repoId), path),
|
||||
});
|
||||
return {
|
||||
size: info.ContentLength,
|
||||
lastModified: info.LastModified,
|
||||
contentType: info.ContentType
|
||||
? info.ContentType
|
||||
: (contentType(path) as string),
|
||||
};
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async read(repoId: string, path: string): Promise<Readable> {
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.rreadm");
|
||||
span.setAttribute("repoId", repoId);
|
||||
span.setAttribute("path", path);
|
||||
try {
|
||||
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
|
||||
const command = new GetObjectCommand({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Key: join(this.repoPath(repoId), path),
|
||||
});
|
||||
const res = (await this.client(3000).send(command)).Body;
|
||||
if (!res) {
|
||||
throw new AnonymousError("file_not_found", {
|
||||
httpStatus: 404,
|
||||
object: join(this.repoPath(repoId), path),
|
||||
});
|
||||
}
|
||||
return res as Readable;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async write(
|
||||
repoId: string,
|
||||
path: string,
|
||||
data: string | Readable,
|
||||
source?: string
|
||||
): Promise<void> {
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.rm");
|
||||
span.setAttribute("repoId", repoId);
|
||||
span.setAttribute("path", path);
|
||||
try {
|
||||
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
|
||||
|
||||
if (data instanceof Readable) {
|
||||
data.on("error", (err) => {
|
||||
console.error(`[ERROR] S3 write ${path}`, err);
|
||||
span.recordException(err as Error);
|
||||
this.rm(repoId, path);
|
||||
});
|
||||
}
|
||||
|
||||
const params: PutObjectCommandInput = {
|
||||
Bucket: config.S3_BUCKET,
|
||||
Key: join(this.repoPath(repoId), path),
|
||||
Body: data,
|
||||
ContentType: contentType(path).toString(),
|
||||
};
|
||||
if (source) {
|
||||
params.Tagging = `source=${source}`;
|
||||
}
|
||||
|
||||
const parallelUploads3 = new Upload({
|
||||
// 30s timeout
|
||||
client: this.client(30000),
|
||||
params,
|
||||
});
|
||||
|
||||
await parallelUploads3.done();
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async listFiles(repoId: string, dir: string = ""): Promise<Tree> {
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.listFiles");
|
||||
span.setAttribute("path", dir);
|
||||
try {
|
||||
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
|
||||
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
|
||||
const out: Tree = {};
|
||||
let req: ListObjectsV2CommandOutput;
|
||||
let nextContinuationToken: string | undefined;
|
||||
do {
|
||||
req = await this.client(30000).listObjectsV2({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Prefix: join(this.repoPath(repoId), dir),
|
||||
MaxKeys: 250,
|
||||
ContinuationToken: nextContinuationToken,
|
||||
});
|
||||
if (!req.Contents) return out;
|
||||
nextContinuationToken = req.NextContinuationToken;
|
||||
|
||||
for (const f of req.Contents) {
|
||||
if (!f.Key) continue;
|
||||
f.Key = f.Key.replace(join(this.repoPath(repoId), dir), "");
|
||||
const paths = f.Key.split("/");
|
||||
let current: Tree = out;
|
||||
for (let i = 0; i < paths.length - 1; i++) {
|
||||
let p = paths[i];
|
||||
if (!p) continue;
|
||||
if (!(current[p] as Tree)) {
|
||||
current[p] = {} as Tree;
|
||||
}
|
||||
current = current[p] as Tree;
|
||||
}
|
||||
|
||||
if (f.ETag) {
|
||||
const fileInfo: TreeFile = { size: f.Size || 0, sha: f.ETag };
|
||||
const fileName = paths[paths.length - 1];
|
||||
if (fileName) current[fileName] = fileInfo;
|
||||
}
|
||||
}
|
||||
} while (req && req.Contents && req.IsTruncated);
|
||||
return out;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async extractZip(
|
||||
repoId: string,
|
||||
path: string,
|
||||
data: Readable,
|
||||
source?: string
|
||||
): Promise<void> {
|
||||
let toS3: ArchiveStreamToS3;
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.extractZip");
|
||||
span.setAttribute("path", path);
|
||||
return new Promise((resolve, reject) => {
|
||||
if (!config.S3_BUCKET) return reject("S3_BUCKET not set");
|
||||
toS3 = new ArchiveStreamToS3({
|
||||
bucket: config.S3_BUCKET,
|
||||
prefix: join(this.repoPath(repoId), path),
|
||||
s3: this.client(2 * 60 * 60 * 1000), // 2h timeout
|
||||
type: "zip",
|
||||
onEntry: (header) => {
|
||||
header.name = header.name.substring(header.name.indexOf("/") + 1);
|
||||
if (source) {
|
||||
header.Tagging = `source=${source}`;
|
||||
header.Metadata = {
|
||||
source: source,
|
||||
};
|
||||
}
|
||||
},
|
||||
maxParallel: 10,
|
||||
});
|
||||
pipeline(data, toS3, (err) => {
|
||||
if (err) {
|
||||
span.recordException(err as Error);
|
||||
return reject(err);
|
||||
}
|
||||
span.end();
|
||||
resolve();
|
||||
})
|
||||
.on("finish", () => {
|
||||
span.end();
|
||||
resolve();
|
||||
})
|
||||
.on("error", reject);
|
||||
});
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async archive(
|
||||
repoId: string,
|
||||
dir: string = "",
|
||||
opt?: {
|
||||
format?: "zip" | "tar";
|
||||
fileTransformer?: (p: string) => Transform;
|
||||
}
|
||||
) {
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.archive");
|
||||
span.setAttribute("repoId", repoId);
|
||||
span.setAttribute("path", dir);
|
||||
try {
|
||||
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
|
||||
const archive = archiver(opt?.format || "zip", {});
|
||||
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
|
||||
|
||||
let req: ListObjectsV2CommandOutput;
|
||||
let nextContinuationToken: string | undefined;
|
||||
do {
|
||||
req = await this.client(30000).listObjectsV2({
|
||||
Bucket: config.S3_BUCKET,
|
||||
Prefix: join(this.repoPath(repoId), dir),
|
||||
MaxKeys: 250,
|
||||
ContinuationToken: nextContinuationToken,
|
||||
});
|
||||
|
||||
nextContinuationToken = req.NextContinuationToken;
|
||||
for (const f of req.Contents || []) {
|
||||
if (!f.Key) continue;
|
||||
const filename = basename(f.Key);
|
||||
const prefix = dirname(
|
||||
f.Key.replace(join(this.repoPath(repoId), dir), "")
|
||||
);
|
||||
|
||||
let rs = await this.read(repoId, f.Key);
|
||||
if (opt?.fileTransformer) {
|
||||
// apply transformation on the stream
|
||||
rs = rs.pipe(opt.fileTransformer(f.Key));
|
||||
}
|
||||
|
||||
archive.append(rs, {
|
||||
name: filename,
|
||||
prefix,
|
||||
});
|
||||
}
|
||||
} while (req && req.Contents?.length && req.IsTruncated);
|
||||
archive.finalize();
|
||||
return archive;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
}
|
||||
118
src/core/storage/Storage.ts
Normal file
118
src/core/storage/Storage.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
import { join } from "path";
|
||||
import { Transform, Readable } from "stream";
|
||||
import * as archiver from "archiver";
|
||||
import { Response } from "express";
|
||||
|
||||
import { Tree } from "../types";
|
||||
import S3Storage from "./S3";
|
||||
import FileSystem from "./FileSystem";
|
||||
|
||||
export type Storage = S3Storage | FileSystem;
|
||||
|
||||
export enum FILE_TYPE {
|
||||
FILE = "file",
|
||||
FOLDER = "folder",
|
||||
NOT_FOUND = "not_found",
|
||||
}
|
||||
|
||||
export default abstract class StorageBase {
|
||||
/**
|
||||
* The type of storage
|
||||
*/
|
||||
abstract type: string;
|
||||
|
||||
/**
|
||||
* check if the path exists
|
||||
* @param path the path to check
|
||||
*/
|
||||
abstract exists(repoId: string, path: string): Promise<FILE_TYPE>;
|
||||
|
||||
abstract send(repoId: string, path: string, res: Response): Promise<void>;
|
||||
|
||||
/**
|
||||
* Read the content of a file
|
||||
* @param path the path to the file
|
||||
*/
|
||||
abstract read(repoId: string, path: string): Promise<Readable>;
|
||||
|
||||
abstract fileInfo(
|
||||
repoId: string,
|
||||
path: string
|
||||
): Promise<{
|
||||
size: number | undefined;
|
||||
lastModified: Date | undefined;
|
||||
contentType: string;
|
||||
}>;
|
||||
|
||||
/**
|
||||
* Write data to a file
|
||||
* @param path the path to the file
|
||||
* @param data the content of the file
|
||||
* @param file the file
|
||||
* @param source the source of the file
|
||||
*/
|
||||
abstract write(
|
||||
repoId: string,
|
||||
path: string,
|
||||
data: string | Readable,
|
||||
source?: string
|
||||
): Promise<void>;
|
||||
|
||||
/**
|
||||
* List the files from dir
|
||||
* @param dir
|
||||
*/
|
||||
abstract listFiles(repoId: string, dir: string): Promise<Tree>;
|
||||
|
||||
/**
|
||||
* Extract the content of tar to dir
|
||||
* @param dir
|
||||
* @param tar
|
||||
* @param file the file
|
||||
* @param source the source of the file
|
||||
*/
|
||||
abstract extractZip(
|
||||
repoId: string,
|
||||
dir: string,
|
||||
tar: Readable,
|
||||
source?: string
|
||||
): Promise<void>;
|
||||
|
||||
/**
|
||||
* Remove the path
|
||||
* @param dir
|
||||
*/
|
||||
abstract rm(repoId: string, dir: string): Promise<void>;
|
||||
|
||||
/**
|
||||
* Archive the content of dir
|
||||
* @param dir
|
||||
* @param opt
|
||||
*/
|
||||
abstract archive(
|
||||
repoId: string,
|
||||
dir: string,
|
||||
opt?: {
|
||||
/**
|
||||
* Archive format
|
||||
*/
|
||||
format?: "zip" | "tar";
|
||||
/**
|
||||
* Transformer to apply on the content of the file
|
||||
*/
|
||||
fileTransformer?: (p: string) => Transform;
|
||||
}
|
||||
): Promise<archiver.Archiver>;
|
||||
|
||||
/**
|
||||
* Create a directory
|
||||
* @param dir
|
||||
*/
|
||||
abstract mk(repoId: string, dir: string): Promise<void>;
|
||||
|
||||
repoPath(repoId: string) {
|
||||
return (
|
||||
join(repoId, "original") + (process.platform === "win32" ? "\\" : "/")
|
||||
);
|
||||
}
|
||||
}
|
||||
32
src/core/types.ts
Normal file
32
src/core/types.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
export interface Branch {
|
||||
name: string;
|
||||
commit: string;
|
||||
readme?: string;
|
||||
}
|
||||
|
||||
export enum RepositoryStatus {
|
||||
QUEUE = "queue",
|
||||
PREPARING = "preparing",
|
||||
DOWNLOAD = "download",
|
||||
READY = "ready",
|
||||
EXPIRED = "expired",
|
||||
EXPIRING = "expiring",
|
||||
REMOVED = "removed",
|
||||
REMOVING = "removing",
|
||||
ERROR = "error",
|
||||
}
|
||||
|
||||
export type ConferenceStatus = "ready" | "expired" | "removed";
|
||||
|
||||
export type SourceStatus = "available" | "unavailable";
|
||||
|
||||
export type TreeElement = Tree | TreeFile;
|
||||
|
||||
export interface Tree {
|
||||
[key: string]: TreeElement;
|
||||
}
|
||||
|
||||
export interface TreeFile {
|
||||
sha: string;
|
||||
size: number;
|
||||
}
|
||||
Reference in New Issue
Block a user