mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-02-12 18:32:44 +00:00
refactor: uncouple repository class & token
This commit is contained in:
@@ -4,18 +4,11 @@ import { Readable } from "stream";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import Repository from "./Repository";
|
||||
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
|
||||
import storage from "./storage";
|
||||
import config from "../config";
|
||||
import {
|
||||
anonymizePath,
|
||||
AnonymizeTransformer,
|
||||
isTextFile,
|
||||
} from "./anonymize-utils";
|
||||
import { anonymizePath, isTextFile } from "./anonymize-utils";
|
||||
import AnonymousError from "./AnonymousError";
|
||||
import { handleError } from "./routes/route-utils";
|
||||
import { lookup } from "mime-types";
|
||||
import { FILE_TYPE } from "./storage/Storage";
|
||||
import GitHubBase from "./source/GitHubBase";
|
||||
|
||||
/**
|
||||
* Represent a file in a anonymized repository
|
||||
|
||||
@@ -39,7 +39,7 @@ export default class AnonymousError extends CustomError {
|
||||
} else if (this.value instanceof User) {
|
||||
detail = `${this.value.username}`;
|
||||
} else if (this.value instanceof GitHubBase) {
|
||||
detail = `${this.value.githubRepository.fullName}`;
|
||||
detail = `GHDownload ${this.value.data.repoId}`;
|
||||
}
|
||||
out += this.message;
|
||||
if (detail) {
|
||||
|
||||
57
src/GitHubUtils.ts
Normal file
57
src/GitHubUtils.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import Repository from "./Repository";
|
||||
import { Octokit } from "@octokit/rest";
|
||||
import UserModel from "./database/users/users.model";
|
||||
import config from "../config";
|
||||
|
||||
export function octokit(token: string) {
|
||||
return new Octokit({
|
||||
auth: token,
|
||||
request: {
|
||||
fetch: fetch,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function checkToken(token: string) {
|
||||
const oct = octokit(token);
|
||||
try {
|
||||
await oct.users.getAuthenticated();
|
||||
return true;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getToken(repository: Repository) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHUtils.getToken");
|
||||
span.setAttribute("repoId", repository.repoId);
|
||||
try {
|
||||
if (repository.model.source.accessToken) {
|
||||
if (await checkToken(repository.model.source.accessToken)) {
|
||||
return repository.model.source.accessToken;
|
||||
}
|
||||
}
|
||||
if (!repository.owner.model.accessTokens?.github) {
|
||||
const accessTokens = (
|
||||
await UserModel.findById(repository.owner.id, {
|
||||
accessTokens: 1,
|
||||
})
|
||||
)?.accessTokens;
|
||||
if (accessTokens) {
|
||||
repository.owner.model.accessTokens = accessTokens;
|
||||
}
|
||||
}
|
||||
if (repository.owner.model.accessTokens?.github) {
|
||||
const check = await checkToken(
|
||||
repository.owner.model.accessTokens?.github
|
||||
);
|
||||
if (check) {
|
||||
return repository.owner.model.accessTokens?.github;
|
||||
}
|
||||
}
|
||||
return config.GITHUB_TOKEN;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
import { RepositoryStatus, Source, Tree, TreeElement, TreeFile } from "./types";
|
||||
import { RepositoryStatus } from "./types";
|
||||
import User from "./User";
|
||||
import { anonymizeContent, anonymizePath } from "./anonymize-utils";
|
||||
import UserModel from "./database/users/users.model";
|
||||
import Conference from "./Conference";
|
||||
import ConferenceModel from "./database/conference/conferences.model";
|
||||
@@ -8,7 +7,8 @@ import AnonymousError from "./AnonymousError";
|
||||
import { IAnonymizedPullRequestDocument } from "./database/anonymizedPullRequests/anonymizedPullRequests.types";
|
||||
import config from "../config";
|
||||
import got from "got";
|
||||
import GitHubBase from "./source/GitHubBase";
|
||||
import { octokit } from "./GitHubUtils";
|
||||
import { ContentAnonimizer } from "./anonymize-utils";
|
||||
|
||||
export default class PullRequest {
|
||||
private _model: IAnonymizedPullRequestDocument;
|
||||
@@ -52,26 +52,23 @@ export default class PullRequest {
|
||||
"[INFO] Downloading pull request",
|
||||
this._model.source.pullRequestId
|
||||
);
|
||||
const octokit = GitHubBase.octokit(await this.getToken());
|
||||
const oct = octokit(await this.getToken());
|
||||
|
||||
const [owner, repo] = this._model.source.repositoryFullName.split("/");
|
||||
const pull_number = this._model.source.pullRequestId;
|
||||
|
||||
const [prInfo, comments, diff] = await Promise.all([
|
||||
octokit.rest.pulls.get({
|
||||
oct.rest.pulls.get({
|
||||
owner,
|
||||
repo,
|
||||
pull_number,
|
||||
}),
|
||||
octokit.paginate(
|
||||
"GET /repos/{owner}/{repo}/issues/{issue_number}/comments",
|
||||
{
|
||||
owner: owner,
|
||||
repo: repo,
|
||||
issue_number: pull_number,
|
||||
per_page: 100,
|
||||
}
|
||||
),
|
||||
oct.paginate("GET /repos/{owner}/{repo}/issues/{issue_number}/comments", {
|
||||
owner: owner,
|
||||
repo: repo,
|
||||
issue_number: pull_number,
|
||||
per_page: 100,
|
||||
}),
|
||||
got(`https://github.com/${owner}/${repo}/pull/${pull_number}.diff`),
|
||||
]);
|
||||
|
||||
@@ -250,18 +247,22 @@ export default class PullRequest {
|
||||
state: this._model.pullRequest.state,
|
||||
draft: this._model.pullRequest.draft,
|
||||
};
|
||||
const anonymizer = new ContentAnonimizer({
|
||||
...this.options,
|
||||
repoId: this.pullRequestId,
|
||||
});
|
||||
if (this.options.title) {
|
||||
output.title = anonymizeContent(this._model.pullRequest.title, this);
|
||||
output.title = anonymizer.anonymize(this._model.pullRequest.title);
|
||||
}
|
||||
if (this.options.body) {
|
||||
output.body = anonymizeContent(this._model.pullRequest.body, this);
|
||||
output.body = anonymizer.anonymize(this._model.pullRequest.body);
|
||||
}
|
||||
if (this.options.comments) {
|
||||
output.comments = this._model.pullRequest.comments?.map((comment) => {
|
||||
const o: any = {};
|
||||
if (this.options.body) o.body = anonymizeContent(comment.body, this);
|
||||
if (this.options.body) o.body = anonymizer.anonymize(comment.body);
|
||||
if (this.options.username)
|
||||
o.author = anonymizeContent(comment.author, this);
|
||||
o.author = anonymizer.anonymize(comment.author);
|
||||
if (this.options.date) {
|
||||
o.updatedDate = comment.updatedDate;
|
||||
o.creationDate = comment.creationDate;
|
||||
@@ -270,7 +271,7 @@ export default class PullRequest {
|
||||
});
|
||||
}
|
||||
if (this.options.diff) {
|
||||
output.diff = anonymizeContent(this._model.pullRequest.diff, this);
|
||||
output.diff = anonymizer.anonymize(this._model.pullRequest.diff);
|
||||
}
|
||||
if (this.options.origin) {
|
||||
output.baseRepositoryFullName =
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import storage from "./storage";
|
||||
import { RepositoryStatus, Source, Tree, TreeElement, TreeFile } from "./types";
|
||||
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
|
||||
import { Readable } from "stream";
|
||||
import User from "./User";
|
||||
import GitHubStream from "./source/GitHubStream";
|
||||
@@ -16,9 +16,9 @@ import AnonymousError from "./AnonymousError";
|
||||
import { downloadQueue } from "./queue";
|
||||
import { isConnected } from "./database/database";
|
||||
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import { getRepositoryFromGitHub } from "./source/GitHubRepository";
|
||||
import config from "../config";
|
||||
import { GitHubRepository } from "./source/GitHubRepository";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { getToken } from "./GitHubUtils";
|
||||
|
||||
function anonymizeTreeRecursive(
|
||||
tree: TreeElement,
|
||||
@@ -48,38 +48,55 @@ function anonymizeTreeRecursive(
|
||||
|
||||
export default class Repository {
|
||||
private _model: IAnonymizedRepositoryDocument;
|
||||
source: Source;
|
||||
owner: User;
|
||||
|
||||
constructor(data: IAnonymizedRepositoryDocument) {
|
||||
this._model = data;
|
||||
switch (data.source.type) {
|
||||
this.owner = new User(new UserModel({ _id: data.owner }));
|
||||
this.owner = new User(new UserModel({ _id: data.owner }));
|
||||
this.owner.model.isNew = false;
|
||||
}
|
||||
|
||||
private checkedToken: boolean = false;
|
||||
|
||||
private async getToken() {
|
||||
if (this.checkedToken) return this._model.source.accessToken as string;
|
||||
const originalToken = this._model.source.accessToken;
|
||||
const token = await getToken(this);
|
||||
if (originalToken != token) {
|
||||
this._model.source.accessToken = token;
|
||||
await this._model.save();
|
||||
}
|
||||
this.checkedToken = true;
|
||||
return token;
|
||||
}
|
||||
|
||||
get source() {
|
||||
switch (this.model.source.type) {
|
||||
case "GitHubDownload":
|
||||
this.source = new GitHubDownload(data.source, this.repoId);
|
||||
break;
|
||||
return new GitHubDownload({
|
||||
repoId: this.repoId,
|
||||
commit: this.model.source.commit || "HEAD",
|
||||
organization: "",
|
||||
repoName: this.model.source.repositoryName || "",
|
||||
getToken: () => this.getToken(),
|
||||
});
|
||||
case "GitHubStream":
|
||||
this.source = new GitHubStream(data.source);
|
||||
break;
|
||||
return new GitHubStream({
|
||||
repoId: this.repoId,
|
||||
commit: this.model.source.commit || "HEAD",
|
||||
organization: "",
|
||||
repoName: this.model.source.repositoryName || "",
|
||||
getToken: () => this.getToken(),
|
||||
});
|
||||
case "Zip":
|
||||
this.source = new Zip(data.source, this.repoId);
|
||||
break;
|
||||
return new Zip(this.model.source, this.repoId);
|
||||
default:
|
||||
throw new AnonymousError("unsupported_source", {
|
||||
object: data.source.type,
|
||||
object: this,
|
||||
httpStatus: 400,
|
||||
});
|
||||
}
|
||||
this.owner = new User(new UserModel({ _id: data.owner }));
|
||||
if (this.source instanceof GitHubBase) {
|
||||
const originalToken = this._model.source.accessToken;
|
||||
this.source.getToken(this.owner.id).then((token) => {
|
||||
if (originalToken != token) {
|
||||
this._model.source.accessToken = token;
|
||||
this._model.save();
|
||||
}
|
||||
});
|
||||
}
|
||||
this.owner.model.isNew = false;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -194,8 +211,8 @@ export default class Repository {
|
||||
image: this.options.image,
|
||||
link: this.options.link,
|
||||
repoId: this.repoId,
|
||||
repoName: (this.source as GitHubBase).githubRepository?.fullName,
|
||||
branchName: (this.source as GitHubBase).branch?.name || "main",
|
||||
repoName: this.model.source.repositoryName,
|
||||
branchName: this.model.source.branch || "main",
|
||||
});
|
||||
}
|
||||
|
||||
@@ -217,16 +234,17 @@ export default class Repository {
|
||||
) {
|
||||
// Only GitHubBase can be update for the moment
|
||||
if (this.source instanceof GitHubBase) {
|
||||
const token = await this.source.getToken(this.owner.id);
|
||||
const branches = await this.source.githubRepository.branches({
|
||||
const token = await this.getToken();
|
||||
const ghRepo = new GitHubRepository({});
|
||||
const branches = await ghRepo.branches({
|
||||
force: true,
|
||||
accessToken: token,
|
||||
});
|
||||
const branch = this.source.branch;
|
||||
const newCommit = branches.filter((f) => f.name == branch.name)[0]
|
||||
const branchName = this.model.source.branch || "main";
|
||||
const newCommit = branches.filter((f) => f.name == branchName)[0]
|
||||
?.commit;
|
||||
if (
|
||||
branch.commit == newCommit &&
|
||||
this.model.source.commit == newCommit &&
|
||||
this.status == RepositoryStatus.READY
|
||||
) {
|
||||
console.log(`[UPDATE] ${this._model.repoId} is up to date`);
|
||||
@@ -235,12 +253,9 @@ export default class Repository {
|
||||
return;
|
||||
}
|
||||
this._model.source.commit = newCommit;
|
||||
const commitInfo = await this.source.githubRepository.getCommitInfo(
|
||||
newCommit,
|
||||
{
|
||||
accessToken: token,
|
||||
}
|
||||
);
|
||||
const commitInfo = await ghRepo.getCommitInfo(newCommit, {
|
||||
accessToken: token,
|
||||
});
|
||||
if (
|
||||
commitInfo.commit?.author?.date ||
|
||||
commitInfo.commit?.committer?.date
|
||||
@@ -249,11 +264,11 @@ export default class Repository {
|
||||
commitInfo.commit.committer?.date) as string;
|
||||
this._model.source.commitDate = new Date(d);
|
||||
}
|
||||
branch.commit = newCommit;
|
||||
this.model.source.commit = newCommit;
|
||||
|
||||
if (!newCommit) {
|
||||
console.error(
|
||||
`${branch.name} for ${this.source.githubRepository.fullName} is not found`
|
||||
`${branchName} for ${this.model.source.repositoryName} is not found`
|
||||
);
|
||||
await this.updateStatus(RepositoryStatus.ERROR, "branch_not_found");
|
||||
await this.resetSate();
|
||||
@@ -268,23 +283,6 @@ export default class Repository {
|
||||
`[UPDATE] ${this._model.repoId} will be updated to ${newCommit}`
|
||||
);
|
||||
|
||||
if (this.source.type == "GitHubDownload") {
|
||||
const repository = await getRepositoryFromGitHub({
|
||||
accessToken: await this.source.getToken(this.owner.id),
|
||||
owner: this.source.githubRepository.owner,
|
||||
repo: this.source.githubRepository.repo,
|
||||
});
|
||||
if (
|
||||
repository.size === undefined ||
|
||||
repository.size > config.MAX_REPO_SIZE
|
||||
) {
|
||||
console.log(
|
||||
`[UPDATE] ${this._model.repoId} will be streamed instead of downloaded`
|
||||
);
|
||||
this._model.source.type = "GitHubStream";
|
||||
}
|
||||
}
|
||||
|
||||
await this.resetSate(RepositoryStatus.PREPARING);
|
||||
await downloadQueue.add(this.repoId, this, {
|
||||
jobId: this.repoId,
|
||||
@@ -513,10 +511,15 @@ export default class Repository {
|
||||
anonymizeDate: this._model.anonymizeDate,
|
||||
status: this.status,
|
||||
statusMessage: this._model.statusMessage,
|
||||
source: this.source.toJSON(),
|
||||
lastView: this._model.lastView,
|
||||
pageView: this._model.pageView,
|
||||
size: this.size,
|
||||
source: {
|
||||
fullName: this.model.source.repositoryName,
|
||||
commit: this.model.source.commit,
|
||||
branch: this.model.source.branch,
|
||||
type: this.model.source.type,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import { GitHubRepository } from "./source/GitHubRepository";
|
||||
import PullRequest from "./PullRequest";
|
||||
import AnonymizedPullRequestModel from "./database/anonymizedPullRequests/anonymizedPullRequests.model";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import GitHubBase from "./source/GitHubBase";
|
||||
import { octokit } from "./GitHubUtils";
|
||||
|
||||
/**
|
||||
* Model for a user
|
||||
@@ -66,9 +66,9 @@ export default class User {
|
||||
opt?.force === true
|
||||
) {
|
||||
// get the list of repo from github
|
||||
const octokit = GitHubBase.octokit(this.accessToken);
|
||||
const oct = octokit(this.accessToken);
|
||||
const repositories = (
|
||||
await octokit.paginate("GET /user/repos", {
|
||||
await oct.paginate("GET /user/repos", {
|
||||
visibility: "all",
|
||||
sort: "pushed",
|
||||
per_page: 100,
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
import config from "../config";
|
||||
import GitHubBase from "./source/GitHubBase";
|
||||
import { isText } from "istextorbinary";
|
||||
import { basename } from "path";
|
||||
import { Transform } from "stream";
|
||||
import { Readable } from "stream";
|
||||
import AnonymizedFile from "./AnonymizedFile";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
|
||||
const urlRegex =
|
||||
@@ -33,16 +31,21 @@ export function isTextFile(filePath: string, content?: Buffer) {
|
||||
}
|
||||
|
||||
export class AnonymizeTransformer extends Transform {
|
||||
public wasAnonimized = false;
|
||||
public isText: boolean | null = null;
|
||||
anonimizer: ContentAnonimizer;
|
||||
|
||||
constructor(
|
||||
private readonly opt: {
|
||||
filePath: string;
|
||||
} & ConstructorParameters<typeof ContentAnonimizer>[1]
|
||||
} & ConstructorParameters<typeof ContentAnonimizer>[0]
|
||||
) {
|
||||
super();
|
||||
this.isText = isTextFile(this.opt.filePath);
|
||||
this.anonimizer = new ContentAnonimizer(this.opt);
|
||||
}
|
||||
|
||||
get wasAnonimized() {
|
||||
return this.anonimizer.wasAnonymized;
|
||||
}
|
||||
|
||||
_transform(chunk: Buffer, encoding: string, callback: () => void) {
|
||||
@@ -55,11 +58,9 @@ export class AnonymizeTransformer extends Transform {
|
||||
}
|
||||
|
||||
if (this.isText) {
|
||||
const anonimizer = new ContentAnonimizer(chunk.toString(), this.opt);
|
||||
anonimizer.anonymize();
|
||||
if (anonimizer.wasAnonymized) {
|
||||
this.wasAnonimized = true;
|
||||
chunk = Buffer.from(anonimizer.content);
|
||||
const content = this.anonimizer.anonymize(chunk.toString());
|
||||
if (this.anonimizer.wasAnonymized) {
|
||||
chunk = Buffer.from(content);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,25 +77,10 @@ export class AnonymizeTransformer extends Transform {
|
||||
}
|
||||
}
|
||||
|
||||
interface Anonymizationptions {
|
||||
repoId?: string;
|
||||
source?: {};
|
||||
options: {
|
||||
terms: string[];
|
||||
image: boolean;
|
||||
link: boolean;
|
||||
pageSource?: {
|
||||
branch: string;
|
||||
path: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
export class ContentAnonimizer {
|
||||
public wasAnonymized = false;
|
||||
|
||||
constructor(
|
||||
public content: string,
|
||||
readonly opt: {
|
||||
image?: boolean;
|
||||
link?: boolean;
|
||||
@@ -105,12 +91,12 @@ export class ContentAnonimizer {
|
||||
}
|
||||
) {}
|
||||
|
||||
private removeImage() {
|
||||
private removeImage(content: string): string {
|
||||
if (this.opt.image !== false) {
|
||||
return;
|
||||
return content;
|
||||
}
|
||||
// remove image in markdown
|
||||
this.content = this.content.replace(
|
||||
return content.replace(
|
||||
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
|
||||
() => {
|
||||
this.wasAnonymized = true;
|
||||
@@ -118,20 +104,20 @@ export class ContentAnonimizer {
|
||||
}
|
||||
);
|
||||
}
|
||||
private removeLink() {
|
||||
private removeLink(content: string): string {
|
||||
if (this.opt.link !== false) {
|
||||
return;
|
||||
return content;
|
||||
}
|
||||
// remove image in markdown
|
||||
this.content = this.content.replace(urlRegex, () => {
|
||||
return content.replace(urlRegex, () => {
|
||||
this.wasAnonymized = true;
|
||||
return config.ANONYMIZATION_MASK;
|
||||
});
|
||||
}
|
||||
|
||||
private replaceGitHubSelfLinks() {
|
||||
private replaceGitHubSelfLinks(content: string): string {
|
||||
if (!this.opt.repoName || !this.opt.branchName) {
|
||||
return;
|
||||
return content;
|
||||
}
|
||||
const repoName = this.opt.repoName;
|
||||
const branchName = this.opt.branchName;
|
||||
@@ -140,28 +126,28 @@ export class ContentAnonimizer {
|
||||
this.wasAnonymized = true;
|
||||
return `https://${config.APP_HOSTNAME}/r/${this.opt.repoId}`;
|
||||
};
|
||||
this.content = this.content.replace(
|
||||
content = content.replace(
|
||||
new RegExp(
|
||||
`https://raw.githubusercontent.com/${repoName}/${branchName}\\b`,
|
||||
"gi"
|
||||
),
|
||||
replaceCallback
|
||||
);
|
||||
this.content = this.content.replace(
|
||||
content = content.replace(
|
||||
new RegExp(`https://github.com/${repoName}/blob/${branchName}\\b`, "gi"),
|
||||
replaceCallback
|
||||
);
|
||||
this.content = this.content.replace(
|
||||
content = content.replace(
|
||||
new RegExp(`https://github.com/${repoName}/tree/${branchName}\\b`, "gi"),
|
||||
replaceCallback
|
||||
);
|
||||
this.content = this.content.replace(
|
||||
return content.replace(
|
||||
new RegExp(`https://github.com/${repoName}`, "gi"),
|
||||
replaceCallback
|
||||
);
|
||||
}
|
||||
|
||||
private replaceTerms() {
|
||||
private replaceTerms(content: string): string {
|
||||
const terms = this.opt.terms || [];
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
let term = terms[i];
|
||||
@@ -176,7 +162,7 @@ export class ContentAnonimizer {
|
||||
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
|
||||
}
|
||||
// remove whole url if it contains the term
|
||||
this.content = this.content.replace(urlRegex, (match) => {
|
||||
content = content.replace(urlRegex, (match) => {
|
||||
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) {
|
||||
this.wasAnonymized = true;
|
||||
return mask;
|
||||
@@ -185,56 +171,34 @@ export class ContentAnonimizer {
|
||||
});
|
||||
|
||||
// remove the term in the text
|
||||
this.content = this.content.replace(
|
||||
new RegExp(`\\b${term}\\b`, "gi"),
|
||||
() => {
|
||||
this.wasAnonymized = true;
|
||||
return mask;
|
||||
}
|
||||
);
|
||||
return content.replace(new RegExp(`\\b${term}\\b`, "gi"), () => {
|
||||
this.wasAnonymized = true;
|
||||
return mask;
|
||||
});
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
anonymize() {
|
||||
anonymize(content: string) {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("ContentAnonimizer.anonymize");
|
||||
try {
|
||||
this.removeImage();
|
||||
content = this.removeImage(content);
|
||||
span.addEvent("removeImage");
|
||||
this.removeLink();
|
||||
content = this.removeLink(content);
|
||||
span.addEvent("removeLink");
|
||||
this.replaceGitHubSelfLinks();
|
||||
content = this.replaceGitHubSelfLinks(content);
|
||||
span.addEvent("replaceGitHubSelfLinks");
|
||||
this.replaceTerms();
|
||||
content = this.replaceTerms(content);
|
||||
span.addEvent("replaceTerms");
|
||||
return this.content;
|
||||
return content;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function anonymizeContent(
|
||||
content: string,
|
||||
repository: Anonymizationptions
|
||||
) {
|
||||
let repoName: string | undefined;
|
||||
let branchName: string | undefined;
|
||||
if (repository.source instanceof GitHubBase) {
|
||||
repoName = repository.source.githubRepository.fullName;
|
||||
branchName = repository.source.branch.name;
|
||||
}
|
||||
return new ContentAnonimizer(content, {
|
||||
repoId: repository.repoId,
|
||||
image: repository.options.image,
|
||||
link: repository.options.link,
|
||||
terms: repository.options.terms,
|
||||
repoName,
|
||||
branchName,
|
||||
}).anonymize();
|
||||
}
|
||||
|
||||
export function anonymizePath(path: string, terms: string[]) {
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
|
||||
@@ -3,7 +3,6 @@ import Repository from "../Repository";
|
||||
import { getRepository as getRepositoryImport } from "../database/database";
|
||||
import { RepositoryStatus } from "../types";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { Span } from "@opentelemetry/sdk-trace-node";
|
||||
|
||||
export default async function (job: SandboxedJob<Repository, void>) {
|
||||
const {
|
||||
|
||||
@@ -9,7 +9,6 @@ import Repository from "../Repository";
|
||||
import User from "../User";
|
||||
import { ensureAuthenticated } from "./connection";
|
||||
import { handleError, getUser, isOwnerOrAdmin, getRepo } from "./route-utils";
|
||||
import RepositoryModel from "../database/repositories/repositories.model";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
|
||||
@@ -5,9 +5,7 @@ import * as db from "../database/database";
|
||||
import { getRepo, getUser, handleError, isOwnerOrAdmin } from "./route-utils";
|
||||
import { getRepositoryFromGitHub } from "../source/GitHubRepository";
|
||||
import gh = require("parse-github-url");
|
||||
import GitHubBase from "../source/GitHubBase";
|
||||
import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import config from "../../config";
|
||||
import { IAnonymizedRepositoryDocument } from "../database/anonymizedRepositories/anonymizedRepositories.types";
|
||||
import Repository from "../Repository";
|
||||
import UserModel from "../database/users/users.model";
|
||||
@@ -18,6 +16,7 @@ import RepositoryModel from "../database/repositories/repositories.model";
|
||||
import User from "../User";
|
||||
import { RepositoryStatus } from "../types";
|
||||
import { IUserDocument } from "../database/users/users.types";
|
||||
import { checkToken } from "../GitHubUtils";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
@@ -41,7 +40,7 @@ async function getTokenForAdmin(user: User, req: express.Request) {
|
||||
});
|
||||
const user: IUserDocument = existingRepo?.owner as any;
|
||||
if (user instanceof UserModel) {
|
||||
const check = await GitHubBase.checkToken(user.accessTokens.github);
|
||||
const check = await checkToken(user.accessTokens.github);
|
||||
if (check) {
|
||||
return user.accessTokens.github;
|
||||
}
|
||||
@@ -100,7 +99,7 @@ router.post("/claim", async (req: express.Request, res: express.Response) => {
|
||||
}
|
||||
|
||||
const dbRepo = await RepositoryModel.findById(
|
||||
(repoConfig.source as GitHubBase).githubRepository.id
|
||||
repoConfig.model.source.repositoryId
|
||||
);
|
||||
|
||||
if (!dbRepo || dbRepo.externalId != repo.id) {
|
||||
|
||||
@@ -86,9 +86,9 @@ router.get(
|
||||
if (
|
||||
repo.status == "expired" &&
|
||||
repo.options.expirationMode == "redirect" &&
|
||||
repo.source.url
|
||||
repo.model.source.repositoryName
|
||||
) {
|
||||
redirectURL = repo.source.url;
|
||||
redirectURL = `https://github.com/${repo.model.source.repositoryName}`;
|
||||
} else {
|
||||
if (
|
||||
repo.status == "expired" ||
|
||||
|
||||
@@ -51,9 +51,9 @@ export async function getRepo(
|
||||
if (
|
||||
repo.status == "expired" &&
|
||||
repo.options.expirationMode == "redirect" &&
|
||||
repo.source.url
|
||||
repo.model.source.repositoryId
|
||||
) {
|
||||
res.redirect(repo.source.url);
|
||||
res.redirect(`https://github.com/${repo.model.source.repositoryName}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ import * as express from "express";
|
||||
import { getRepo, handleError } from "./route-utils";
|
||||
import * as path from "path";
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import GitHubDownload from "../source/GitHubDownload";
|
||||
import AnonymousError from "../AnonymousError";
|
||||
import { Tree, TreeElement } from "../types";
|
||||
import * as marked from "marked";
|
||||
@@ -36,10 +35,7 @@ async function webView(req: express.Request, res: express.Response) {
|
||||
});
|
||||
}
|
||||
|
||||
if (
|
||||
repo.options.pageSource?.branch !=
|
||||
(repo.source as GitHubDownload).branch.name
|
||||
) {
|
||||
if (repo.options.pageSource?.branch != repo.model.source.branch) {
|
||||
throw new AnonymousError("page_not_supported_on_different_branch", {
|
||||
httpStatus: 400,
|
||||
object: repo,
|
||||
|
||||
@@ -1,39 +1,36 @@
|
||||
import { Octokit } from "@octokit/rest";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { Readable } from "stream";
|
||||
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { Branch, Tree } from "../types";
|
||||
import { GitHubRepository } from "./GitHubRepository";
|
||||
import config from "../../config";
|
||||
import UserModel from "../database/users/users.model";
|
||||
import { Tree } from "../types";
|
||||
|
||||
export default abstract class GitHubBase {
|
||||
export interface GitHubBaseData {
|
||||
getToken: () => string | Promise<string>;
|
||||
repoId: string;
|
||||
organization: string;
|
||||
repoName: string;
|
||||
commit: string;
|
||||
}
|
||||
|
||||
export interface SourceBase {
|
||||
readonly type: string;
|
||||
|
||||
/**
|
||||
* Retrieve the fie content
|
||||
* @param file the file of the content to retrieve
|
||||
*/
|
||||
getFileContent(file: AnonymizedFile): Promise<Readable>;
|
||||
|
||||
/**
|
||||
* Get all the files from a specific source
|
||||
*/
|
||||
getFiles(progress?: (status: string) => void): Promise<Tree>;
|
||||
}
|
||||
|
||||
export default abstract class GitHubBase implements SourceBase {
|
||||
abstract type: "GitHubDownload" | "GitHubStream" | "Zip";
|
||||
githubRepository: GitHubRepository;
|
||||
branch: Branch;
|
||||
accessToken: string | undefined;
|
||||
validToken: boolean = false;
|
||||
|
||||
constructor(data: {
|
||||
accessToken?: string;
|
||||
commit?: string;
|
||||
branch?: string;
|
||||
repositoryId?: string;
|
||||
repositoryName?: string;
|
||||
}) {
|
||||
this.accessToken = data.accessToken;
|
||||
const branches = [];
|
||||
if (data.branch && data.commit) {
|
||||
branches.push({ commit: data.commit, name: data.branch });
|
||||
}
|
||||
this.githubRepository = new GitHubRepository({
|
||||
name: data.repositoryName,
|
||||
externalId: data.repositoryId,
|
||||
branches,
|
||||
});
|
||||
this.branch = branches[0];
|
||||
}
|
||||
constructor(readonly data: GitHubBaseData) {}
|
||||
|
||||
abstract getFileContent(
|
||||
file: AnonymizedFile,
|
||||
@@ -41,69 +38,4 @@ export default abstract class GitHubBase {
|
||||
): Promise<Readable>;
|
||||
|
||||
abstract getFiles(progress?: (status: string) => void): Promise<Tree>;
|
||||
|
||||
static octokit(token: string) {
|
||||
return new Octokit({
|
||||
auth: token,
|
||||
request: {
|
||||
fetch: fetch,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
static async checkToken(token: string) {
|
||||
const octokit = GitHubBase.octokit(token);
|
||||
try {
|
||||
await octokit.users.getAuthenticated();
|
||||
return true;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async getToken(ownerID?: any) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHBase.getToken");
|
||||
span.setAttribute("repoId", this.githubRepository.fullName || "");
|
||||
try {
|
||||
if (this.validToken) {
|
||||
return this.accessToken as string;
|
||||
}
|
||||
if (ownerID) {
|
||||
const user = await UserModel.findById(ownerID, {
|
||||
accessTokens: 1,
|
||||
});
|
||||
if (user?.accessTokens.github) {
|
||||
const check = await GitHubBase.checkToken(user.accessTokens.github);
|
||||
if (check) {
|
||||
this.accessToken = user.accessTokens.github;
|
||||
this.validToken = true;
|
||||
return this.accessToken;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (this.accessToken) {
|
||||
if (await GitHubBase.checkToken(this.accessToken)) {
|
||||
this.validToken = true;
|
||||
return this.accessToken;
|
||||
}
|
||||
}
|
||||
this.accessToken = config.GITHUB_TOKEN;
|
||||
return this.accessToken;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
get url() {
|
||||
return "https://github.com/" + this.githubRepository.fullName;
|
||||
}
|
||||
|
||||
toJSON(): any {
|
||||
return {
|
||||
type: this.type,
|
||||
fullName: this.githubRepository.fullName?.toString(),
|
||||
branch: this.branch?.name,
|
||||
commit: this.branch?.commit,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,56 +3,45 @@ import { Readable } from "stream";
|
||||
import { OctokitResponse } from "@octokit/types";
|
||||
|
||||
import storage from "../storage";
|
||||
import GitHubBase from "./GitHubBase";
|
||||
import GitHubBase, { GitHubBaseData } from "./GitHubBase";
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { SourceBase } from "../types";
|
||||
import AnonymousError from "../AnonymousError";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { FILE_TYPE } from "../storage/Storage";
|
||||
import { octokit } from "../GitHubUtils";
|
||||
|
||||
export default class GitHubDownload extends GitHubBase implements SourceBase {
|
||||
export default class GitHubDownload extends GitHubBase {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubDownload";
|
||||
constructor(
|
||||
data: {
|
||||
branch?: string;
|
||||
commit?: string;
|
||||
repositoryId?: string;
|
||||
repositoryName?: string;
|
||||
accessToken?: string;
|
||||
},
|
||||
readonly repoId: string
|
||||
) {
|
||||
constructor(data: GitHubBaseData) {
|
||||
super(data);
|
||||
}
|
||||
|
||||
private async _getZipUrl(
|
||||
auth: string
|
||||
): Promise<OctokitResponse<unknown, 302>> {
|
||||
const octokit = GitHubBase.octokit(auth as string);
|
||||
return octokit.rest.repos.downloadZipballArchive({
|
||||
owner: this.githubRepository.owner,
|
||||
repo: this.githubRepository.repo,
|
||||
ref: this.branch?.commit || "HEAD",
|
||||
private async _getZipUrl(): Promise<OctokitResponse<unknown, 302>> {
|
||||
const oct = octokit(await this.data.getToken());
|
||||
return oct.rest.repos.downloadZipballArchive({
|
||||
owner: this.data.organization,
|
||||
repo: this.data.repoName,
|
||||
ref: this.data.commit || "HEAD",
|
||||
method: "HEAD",
|
||||
});
|
||||
}
|
||||
|
||||
async download(token: string, progress?: (status: string) => void) {
|
||||
async download(progress?: (status: string) => void) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHDownload.download");
|
||||
span.setAttribute("repoId", this.githubRepository.fullName || "");
|
||||
span.setAttribute("repoId", this.data.repoId);
|
||||
try {
|
||||
let response: OctokitResponse<unknown, number>;
|
||||
try {
|
||||
response = await this._getZipUrl(token);
|
||||
response = await this._getZipUrl();
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
throw new AnonymousError("repo_not_accessible", {
|
||||
httpStatus: 404,
|
||||
object: this.githubRepository,
|
||||
object: this.data,
|
||||
cause: error as Error,
|
||||
});
|
||||
}
|
||||
await storage.mk(this.repoId);
|
||||
await storage.mk(this.data.repoId);
|
||||
let downloadProgress: { transferred: number } | undefined = undefined;
|
||||
let progressTimeout;
|
||||
let inDownload = true;
|
||||
@@ -73,18 +62,17 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
|
||||
downloadProgress = p;
|
||||
});
|
||||
await storage.extractZip(
|
||||
this.repoId,
|
||||
this.data.repoId,
|
||||
"",
|
||||
downloadStream,
|
||||
undefined,
|
||||
this
|
||||
this.type
|
||||
);
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
throw new AnonymousError("unable_to_download", {
|
||||
httpStatus: 500,
|
||||
cause: error as Error,
|
||||
object: this.githubRepository,
|
||||
object: this.data,
|
||||
});
|
||||
} finally {
|
||||
inDownload = false;
|
||||
@@ -102,11 +90,11 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("GHDownload.getFileContent");
|
||||
span.setAttribute("repoId", this.githubRepository.fullName || "");
|
||||
span.setAttribute("repoId", file.repository.repoId);
|
||||
try {
|
||||
const exists = await storage.exists(file.filePath);
|
||||
if (exists === FILE_TYPE.FILE) {
|
||||
return storage.read(this.repoId, file.filePath);
|
||||
return storage.read(this.data.repoId, file.filePath);
|
||||
} else if (exists === FILE_TYPE.FOLDER) {
|
||||
throw new AnonymousError("folder_not_supported", {
|
||||
httpStatus: 400,
|
||||
@@ -117,20 +105,17 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
|
||||
await file.originalPath();
|
||||
|
||||
// the cache is not ready, we need to download the repository
|
||||
await this.download(
|
||||
await this.getToken(file.repository.owner.id),
|
||||
progress
|
||||
);
|
||||
return storage.read(this.repoId, file.filePath);
|
||||
await this.download(progress);
|
||||
return storage.read(this.data.repoId, file.filePath);
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
async getFiles() {
|
||||
if ((await storage.exists(this.repoId)) === FILE_TYPE.NOT_FOUND) {
|
||||
await this.download(await this.getToken());
|
||||
async getFiles(progress?: (status: string) => void) {
|
||||
if ((await storage.exists(this.data.repoId)) === FILE_TYPE.NOT_FOUND) {
|
||||
await this.download(progress);
|
||||
}
|
||||
return storage.listFiles(this.repoId);
|
||||
return storage.listFiles(this.data.repoId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import RepositoryModel from "../database/repositories/repositories.model";
|
||||
import AnonymousError from "../AnonymousError";
|
||||
import { isConnected } from "../database/database";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import GitHubBase from "./GitHubBase";
|
||||
import { octokit } from "../GitHubUtils";
|
||||
|
||||
export class GitHubRepository {
|
||||
private _data: Partial<{
|
||||
@@ -49,7 +49,7 @@ export class GitHubRepository {
|
||||
async getCommitInfo(
|
||||
sha: string,
|
||||
opt: {
|
||||
accessToken?: string;
|
||||
accessToken: string;
|
||||
}
|
||||
) {
|
||||
const span = trace
|
||||
@@ -58,8 +58,8 @@ export class GitHubRepository {
|
||||
span.setAttribute("owner", this.owner);
|
||||
span.setAttribute("repo", this.repo);
|
||||
try {
|
||||
const octokit = GitHubBase.octokit(opt.accessToken as string);
|
||||
const commit = await octokit.repos.getCommit({
|
||||
const oct = octokit(opt.accessToken);
|
||||
const commit = await oct.repos.getCommit({
|
||||
owner: this.owner,
|
||||
repo: this.repo,
|
||||
ref: sha,
|
||||
@@ -71,7 +71,7 @@ export class GitHubRepository {
|
||||
}
|
||||
|
||||
async branches(opt: {
|
||||
accessToken?: string;
|
||||
accessToken: string;
|
||||
force?: boolean;
|
||||
}): Promise<Branch[]> {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHRepository.branches");
|
||||
@@ -84,10 +84,10 @@ export class GitHubRepository {
|
||||
opt?.force === true
|
||||
) {
|
||||
// get the list of repo from github
|
||||
const octokit = GitHubBase.octokit(opt.accessToken as string);
|
||||
const oct = octokit(opt.accessToken);
|
||||
try {
|
||||
const branches = (
|
||||
await octokit.paginate("GET /repos/{owner}/{repo}/branches", {
|
||||
await oct.paginate("GET /repos/{owner}/{repo}/branches", {
|
||||
owner: this.owner,
|
||||
repo: this.repo,
|
||||
per_page: 100,
|
||||
@@ -132,7 +132,7 @@ export class GitHubRepository {
|
||||
async readme(opt: {
|
||||
branch?: string;
|
||||
force?: boolean;
|
||||
accessToken?: string;
|
||||
accessToken: string;
|
||||
}): Promise<string | undefined> {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHRepository.readme");
|
||||
span.setAttribute("owner", this.owner);
|
||||
@@ -154,9 +154,9 @@ export class GitHubRepository {
|
||||
const selected = model.branches.filter((f) => f.name == opt.branch)[0];
|
||||
if (selected && (!selected.readme || opt?.force === true)) {
|
||||
// get the list of repo from github
|
||||
const octokit = GitHubBase.octokit(opt.accessToken as string);
|
||||
const oct = octokit(opt.accessToken);
|
||||
try {
|
||||
const ghRes = await octokit.repos.getReadme({
|
||||
const ghRes = await oct.repos.getReadme({
|
||||
owner: this.owner,
|
||||
repo: this.repo,
|
||||
ref: selected?.commit,
|
||||
@@ -239,11 +239,11 @@ export async function getRepositoryFromGitHub(opt: {
|
||||
if (opt.repo.indexOf(".git") > -1) {
|
||||
opt.repo = opt.repo.replace(".git", "");
|
||||
}
|
||||
const octokit = GitHubBase.octokit(opt.accessToken as string);
|
||||
const oct = octokit(opt.accessToken);
|
||||
let r: RestEndpointMethodTypes["repos"]["get"]["response"]["data"];
|
||||
try {
|
||||
r = (
|
||||
await octokit.repos.get({
|
||||
await oct.repos.get({
|
||||
owner: opt.owner,
|
||||
repo: opt.repo,
|
||||
})
|
||||
@@ -282,7 +282,7 @@ export async function getRepositoryFromGitHub(opt: {
|
||||
model.defaultBranch = r.default_branch;
|
||||
model.hasPage = r.has_pages;
|
||||
if (model.hasPage) {
|
||||
const ghPageRes = await octokit.repos.getPages({
|
||||
const ghPageRes = await oct.repos.getPages({
|
||||
owner: opt.owner,
|
||||
repo: opt.repo,
|
||||
});
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import GitHubBase from "./GitHubBase";
|
||||
import GitHubBase, { GitHubBaseData } from "./GitHubBase";
|
||||
import storage from "../storage";
|
||||
import { SourceBase, Tree } from "../types";
|
||||
import { Tree } from "../types";
|
||||
import * as path from "path";
|
||||
import got from "got";
|
||||
|
||||
@@ -10,28 +10,23 @@ import AnonymousError from "../AnonymousError";
|
||||
import config from "../../config";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { FILE_TYPE } from "../storage/Storage";
|
||||
import { octokit } from "../GitHubUtils";
|
||||
|
||||
export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
export default class GitHubStream extends GitHubBase {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream";
|
||||
|
||||
constructor(data: {
|
||||
branch?: string;
|
||||
commit?: string;
|
||||
repositoryId?: string;
|
||||
repositoryName?: string;
|
||||
accessToken?: string;
|
||||
}) {
|
||||
constructor(data: GitHubBaseData) {
|
||||
super(data);
|
||||
}
|
||||
|
||||
downloadFile(sha: string, token: string) {
|
||||
downloadFile(token: string, sha: string) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile");
|
||||
span.setAttribute("sha", sha);
|
||||
const octokit = GitHubBase.octokit(token);
|
||||
const oct = octokit(token);
|
||||
try {
|
||||
const { url } = octokit.rest.git.getBlob.endpoint({
|
||||
owner: this.githubRepository.owner,
|
||||
repo: this.githubRepository.repo,
|
||||
const { url } = oct.rest.git.getBlob.endpoint({
|
||||
owner: this.data.organization,
|
||||
repo: this.data.repoName,
|
||||
file_sha: sha,
|
||||
});
|
||||
return got.stream(url, {
|
||||
@@ -46,7 +41,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
// span.recordException(error as Error);
|
||||
throw new AnonymousError("repo_not_accessible", {
|
||||
httpStatus: 404,
|
||||
object: this.githubRepository,
|
||||
object: this.data,
|
||||
cause: error as Error,
|
||||
});
|
||||
} finally {
|
||||
@@ -88,8 +83,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
});
|
||||
}
|
||||
try {
|
||||
const token = await this.getToken(file.repository.owner.id);
|
||||
const content = this.downloadFile(file_sha, token);
|
||||
const content = this.downloadFile(await this.data.getToken(), file_sha);
|
||||
|
||||
// duplicate the stream to write it to the storage
|
||||
const stream1 = content.pipe(new stream.PassThrough());
|
||||
@@ -98,8 +92,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
file.repository.repoId,
|
||||
file.filePath,
|
||||
stream1,
|
||||
file,
|
||||
this
|
||||
this.type
|
||||
);
|
||||
return stream2;
|
||||
} catch (error) {
|
||||
@@ -126,17 +119,15 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
|
||||
async getFiles() {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.getFiles");
|
||||
span.setAttribute("repoName", this.githubRepository.fullName || "");
|
||||
span.setAttribute("repoId", this.data.repoId);
|
||||
try {
|
||||
let commit = this.branch?.commit;
|
||||
return this.getTree(await this.getToken(), commit);
|
||||
return this.getTree(this.data.commit);
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
private async getTree(
|
||||
token: string,
|
||||
sha: string,
|
||||
truncatedTree: Tree = {},
|
||||
parentPath: string = "",
|
||||
@@ -146,13 +137,12 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
}
|
||||
) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.getTree");
|
||||
span.setAttribute("repoName", this.githubRepository.fullName || "");
|
||||
span.setAttribute("sha", sha);
|
||||
|
||||
let ghRes: Awaited<ReturnType<typeof this.getGHTree>>;
|
||||
try {
|
||||
count.request++;
|
||||
ghRes = await this.getGHTree(token, sha, { recursive: true });
|
||||
ghRes = await this.getGHTree(sha, { recursive: true });
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
span.recordException(error as Error);
|
||||
@@ -165,8 +155,6 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
httpStatus: (error as any).status,
|
||||
cause: error as Error,
|
||||
object: {
|
||||
owner: this.githubRepository.owner,
|
||||
repo: this.githubRepository.repo,
|
||||
tree_sha: sha,
|
||||
},
|
||||
});
|
||||
@@ -178,24 +166,20 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
const tree = this.tree2Tree(ghRes.tree, truncatedTree, parentPath);
|
||||
count.file += ghRes.tree.length;
|
||||
if (ghRes.truncated) {
|
||||
await this.getTruncatedTree(token, sha, tree, parentPath, count);
|
||||
await this.getTruncatedTree(sha, tree, parentPath, count);
|
||||
}
|
||||
span.end();
|
||||
return tree;
|
||||
}
|
||||
|
||||
private async getGHTree(
|
||||
token: string,
|
||||
sha: string,
|
||||
opt = { recursive: true }
|
||||
) {
|
||||
private async getGHTree(sha: string, opt = { recursive: true }) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.getGHTree");
|
||||
span.setAttribute("sha", sha);
|
||||
try {
|
||||
const octokit = GitHubBase.octokit(token);
|
||||
const ghRes = await octokit.git.getTree({
|
||||
owner: this.githubRepository.owner,
|
||||
repo: this.githubRepository.repo,
|
||||
const oct = octokit(await this.data.getToken());
|
||||
const ghRes = await oct.git.getTree({
|
||||
owner: this.data.organization,
|
||||
repo: this.data.repoName,
|
||||
tree_sha: sha,
|
||||
recursive: opt.recursive ? "1" : undefined,
|
||||
});
|
||||
@@ -206,7 +190,6 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
}
|
||||
|
||||
private async getTruncatedTree(
|
||||
token: string,
|
||||
sha: string,
|
||||
truncatedTree: Tree = {},
|
||||
parentPath: string = "",
|
||||
@@ -226,7 +209,9 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
let data = null;
|
||||
|
||||
try {
|
||||
data = await this.getGHTree(token, sha, { recursive: false });
|
||||
data = await this.getGHTree(sha, {
|
||||
recursive: false,
|
||||
});
|
||||
this.tree2Tree(data.tree, truncatedTree, parentPath);
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
@@ -241,7 +226,6 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
const elementPath = path.join(parentPath, file.path);
|
||||
promises.push(
|
||||
this.getTruncatedTree(
|
||||
token,
|
||||
file.sha,
|
||||
truncatedTree,
|
||||
elementPath,
|
||||
@@ -254,7 +238,9 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
|
||||
await Promise.all(promises);
|
||||
} else {
|
||||
try {
|
||||
const data = await this.getGHTree(token, sha, { recursive: true });
|
||||
const data = await this.getGHTree(sha, {
|
||||
recursive: true,
|
||||
});
|
||||
this.tree2Tree(data.tree, truncatedTree, parentPath);
|
||||
if (data.truncated) {
|
||||
// TODO: TRUNCATED
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import storage from "../storage";
|
||||
import { SourceBase } from "../types";
|
||||
import * as stream from "stream";
|
||||
import { SourceBase } from "./GitHubBase";
|
||||
|
||||
export default class Zip implements SourceBase {
|
||||
type = "Zip";
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import { SourceBase, Tree } from "../types";
|
||||
import { Tree } from "../types";
|
||||
import config from "../../config";
|
||||
import { Stream } from "node:stream";
|
||||
import * as fs from "fs";
|
||||
import { Extract } from "unzip-stream";
|
||||
import { join, basename, dirname } from "path";
|
||||
@@ -8,7 +7,6 @@ import { Response } from "express";
|
||||
import { Readable, pipeline, Transform } from "stream";
|
||||
import * as archiver from "archiver";
|
||||
import { promisify } from "util";
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { lookup } from "mime-types";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import StorageBase, { FILE_TYPE } from "./Storage";
|
||||
@@ -78,9 +76,7 @@ export default class FileSystem extends StorageBase {
|
||||
async write(
|
||||
repoId: string,
|
||||
p: string,
|
||||
data: string | Readable,
|
||||
file?: AnonymizedFile,
|
||||
source?: SourceBase
|
||||
data: string | Readable
|
||||
): Promise<void> {
|
||||
const span = trace.getTracer("ano-file").startSpan("fs.write");
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
||||
@@ -170,13 +166,7 @@ export default class FileSystem extends StorageBase {
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async extractZip(
|
||||
repoId: string,
|
||||
p: string,
|
||||
data: Readable,
|
||||
file?: AnonymizedFile,
|
||||
source?: SourceBase
|
||||
): Promise<void> {
|
||||
async extractZip(repoId: string, p: string, data: Readable): Promise<void> {
|
||||
const pipe = promisify(pipeline);
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
||||
return pipe(
|
||||
|
||||
@@ -14,9 +14,8 @@ import { contentType } from "mime-types";
|
||||
import * as archiver from "archiver";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { dirname, basename, join } from "path";
|
||||
import { SourceBase, Tree, TreeFile } from "../types";
|
||||
import { Tree, TreeFile } from "../types";
|
||||
import AnonymousError from "../AnonymousError";
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import StorageBase, { FILE_TYPE } from "./Storage";
|
||||
|
||||
export default class S3Storage extends StorageBase {
|
||||
@@ -205,8 +204,7 @@ export default class S3Storage extends StorageBase {
|
||||
repoId: string,
|
||||
path: string,
|
||||
data: string | Readable,
|
||||
file?: AnonymizedFile,
|
||||
source?: SourceBase
|
||||
source?: string
|
||||
): Promise<void> {
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.rm");
|
||||
span.setAttribute("repoId", repoId);
|
||||
@@ -221,7 +219,7 @@ export default class S3Storage extends StorageBase {
|
||||
ContentType: contentType(path).toString(),
|
||||
};
|
||||
if (source) {
|
||||
params.Tagging = `source=${source.type}`;
|
||||
params.Tagging = `source=${source}`;
|
||||
}
|
||||
|
||||
const parallelUploads3 = new Upload({
|
||||
@@ -289,8 +287,7 @@ export default class S3Storage extends StorageBase {
|
||||
repoId: string,
|
||||
path: string,
|
||||
data: Readable,
|
||||
file?: AnonymizedFile,
|
||||
source?: SourceBase
|
||||
source?: string
|
||||
): Promise<void> {
|
||||
let toS3: ArchiveStreamToS3;
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.extractZip");
|
||||
@@ -305,9 +302,9 @@ export default class S3Storage extends StorageBase {
|
||||
onEntry: (header) => {
|
||||
header.name = header.name.substring(header.name.indexOf("/") + 1);
|
||||
if (source) {
|
||||
header.Tagging = `source=${source.type}`;
|
||||
header.Tagging = `source=${source}`;
|
||||
header.Metadata = {
|
||||
source: source.type,
|
||||
source: source,
|
||||
};
|
||||
}
|
||||
},
|
||||
|
||||
@@ -3,8 +3,7 @@ import { Transform, Readable } from "stream";
|
||||
import * as archiver from "archiver";
|
||||
import { Response } from "express";
|
||||
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { SourceBase, Tree } from "../types";
|
||||
import { Tree } from "../types";
|
||||
|
||||
export enum FILE_TYPE {
|
||||
FILE = "file",
|
||||
@@ -52,8 +51,7 @@ export default abstract class StorageBase {
|
||||
repoId: string,
|
||||
path: string,
|
||||
data: string | Readable,
|
||||
file?: AnonymizedFile,
|
||||
source?: SourceBase
|
||||
source?: string
|
||||
): Promise<void>;
|
||||
|
||||
/**
|
||||
@@ -73,8 +71,7 @@ export default abstract class StorageBase {
|
||||
repoId: string,
|
||||
dir: string,
|
||||
tar: Readable,
|
||||
file?: AnonymizedFile,
|
||||
source?: SourceBase
|
||||
source?: string
|
||||
): Promise<void>;
|
||||
|
||||
/**
|
||||
|
||||
24
src/types.ts
24
src/types.ts
@@ -3,30 +3,6 @@ import GitHubStream from "./source/GitHubStream";
|
||||
import Zip from "./source/Zip";
|
||||
import S3Storage from "./storage/S3";
|
||||
import FileSystem from "./storage/FileSystem";
|
||||
import AnonymizedFile from "./AnonymizedFile";
|
||||
import { Readable } from "stream";
|
||||
|
||||
export interface SourceBase {
|
||||
readonly type: string;
|
||||
|
||||
/**
|
||||
* The url of the source
|
||||
*/
|
||||
url?: string;
|
||||
|
||||
/**
|
||||
* Retrieve the fie content
|
||||
* @param file the file of the content to retrieve
|
||||
*/
|
||||
getFileContent(file: AnonymizedFile): Promise<Readable>;
|
||||
|
||||
/**
|
||||
* Get all the files from a specific source
|
||||
*/
|
||||
getFiles(): Promise<Tree>;
|
||||
|
||||
toJSON(): any;
|
||||
}
|
||||
|
||||
export type Source = GitHubDownload | GitHubStream | Zip;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user