refactor: uncouple repository class & token

This commit is contained in:
tdurieux
2024-04-02 13:51:13 +01:00
parent ea96c31e9d
commit fa2591fe38
22 changed files with 290 additions and 416 deletions

View File

@@ -4,18 +4,11 @@ import { Readable } from "stream";
import { trace } from "@opentelemetry/api";
import Repository from "./Repository";
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
import storage from "./storage";
import config from "../config";
import {
anonymizePath,
AnonymizeTransformer,
isTextFile,
} from "./anonymize-utils";
import { anonymizePath, isTextFile } from "./anonymize-utils";
import AnonymousError from "./AnonymousError";
import { handleError } from "./routes/route-utils";
import { lookup } from "mime-types";
import { FILE_TYPE } from "./storage/Storage";
import GitHubBase from "./source/GitHubBase";
/**
* Represent a file in a anonymized repository

View File

@@ -39,7 +39,7 @@ export default class AnonymousError extends CustomError {
} else if (this.value instanceof User) {
detail = `${this.value.username}`;
} else if (this.value instanceof GitHubBase) {
detail = `${this.value.githubRepository.fullName}`;
detail = `GHDownload ${this.value.data.repoId}`;
}
out += this.message;
if (detail) {

57
src/GitHubUtils.ts Normal file
View File

@@ -0,0 +1,57 @@
import { trace } from "@opentelemetry/api";
import Repository from "./Repository";
import { Octokit } from "@octokit/rest";
import UserModel from "./database/users/users.model";
import config from "../config";
export function octokit(token: string) {
return new Octokit({
auth: token,
request: {
fetch: fetch,
},
});
}
export async function checkToken(token: string) {
const oct = octokit(token);
try {
await oct.users.getAuthenticated();
return true;
} catch (error) {
return false;
}
}
export async function getToken(repository: Repository) {
const span = trace.getTracer("ano-file").startSpan("GHUtils.getToken");
span.setAttribute("repoId", repository.repoId);
try {
if (repository.model.source.accessToken) {
if (await checkToken(repository.model.source.accessToken)) {
return repository.model.source.accessToken;
}
}
if (!repository.owner.model.accessTokens?.github) {
const accessTokens = (
await UserModel.findById(repository.owner.id, {
accessTokens: 1,
})
)?.accessTokens;
if (accessTokens) {
repository.owner.model.accessTokens = accessTokens;
}
}
if (repository.owner.model.accessTokens?.github) {
const check = await checkToken(
repository.owner.model.accessTokens?.github
);
if (check) {
return repository.owner.model.accessTokens?.github;
}
}
return config.GITHUB_TOKEN;
} finally {
span.end();
}
}

View File

@@ -1,6 +1,5 @@
import { RepositoryStatus, Source, Tree, TreeElement, TreeFile } from "./types";
import { RepositoryStatus } from "./types";
import User from "./User";
import { anonymizeContent, anonymizePath } from "./anonymize-utils";
import UserModel from "./database/users/users.model";
import Conference from "./Conference";
import ConferenceModel from "./database/conference/conferences.model";
@@ -8,7 +7,8 @@ import AnonymousError from "./AnonymousError";
import { IAnonymizedPullRequestDocument } from "./database/anonymizedPullRequests/anonymizedPullRequests.types";
import config from "../config";
import got from "got";
import GitHubBase from "./source/GitHubBase";
import { octokit } from "./GitHubUtils";
import { ContentAnonimizer } from "./anonymize-utils";
export default class PullRequest {
private _model: IAnonymizedPullRequestDocument;
@@ -52,26 +52,23 @@ export default class PullRequest {
"[INFO] Downloading pull request",
this._model.source.pullRequestId
);
const octokit = GitHubBase.octokit(await this.getToken());
const oct = octokit(await this.getToken());
const [owner, repo] = this._model.source.repositoryFullName.split("/");
const pull_number = this._model.source.pullRequestId;
const [prInfo, comments, diff] = await Promise.all([
octokit.rest.pulls.get({
oct.rest.pulls.get({
owner,
repo,
pull_number,
}),
octokit.paginate(
"GET /repos/{owner}/{repo}/issues/{issue_number}/comments",
{
owner: owner,
repo: repo,
issue_number: pull_number,
per_page: 100,
}
),
oct.paginate("GET /repos/{owner}/{repo}/issues/{issue_number}/comments", {
owner: owner,
repo: repo,
issue_number: pull_number,
per_page: 100,
}),
got(`https://github.com/${owner}/${repo}/pull/${pull_number}.diff`),
]);
@@ -250,18 +247,22 @@ export default class PullRequest {
state: this._model.pullRequest.state,
draft: this._model.pullRequest.draft,
};
const anonymizer = new ContentAnonimizer({
...this.options,
repoId: this.pullRequestId,
});
if (this.options.title) {
output.title = anonymizeContent(this._model.pullRequest.title, this);
output.title = anonymizer.anonymize(this._model.pullRequest.title);
}
if (this.options.body) {
output.body = anonymizeContent(this._model.pullRequest.body, this);
output.body = anonymizer.anonymize(this._model.pullRequest.body);
}
if (this.options.comments) {
output.comments = this._model.pullRequest.comments?.map((comment) => {
const o: any = {};
if (this.options.body) o.body = anonymizeContent(comment.body, this);
if (this.options.body) o.body = anonymizer.anonymize(comment.body);
if (this.options.username)
o.author = anonymizeContent(comment.author, this);
o.author = anonymizer.anonymize(comment.author);
if (this.options.date) {
o.updatedDate = comment.updatedDate;
o.creationDate = comment.creationDate;
@@ -270,7 +271,7 @@ export default class PullRequest {
});
}
if (this.options.diff) {
output.diff = anonymizeContent(this._model.pullRequest.diff, this);
output.diff = anonymizer.anonymize(this._model.pullRequest.diff);
}
if (this.options.origin) {
output.baseRepositoryFullName =

View File

@@ -1,5 +1,5 @@
import storage from "./storage";
import { RepositoryStatus, Source, Tree, TreeElement, TreeFile } from "./types";
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
import { Readable } from "stream";
import User from "./User";
import GitHubStream from "./source/GitHubStream";
@@ -16,9 +16,9 @@ import AnonymousError from "./AnonymousError";
import { downloadQueue } from "./queue";
import { isConnected } from "./database/database";
import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model";
import { getRepositoryFromGitHub } from "./source/GitHubRepository";
import config from "../config";
import { GitHubRepository } from "./source/GitHubRepository";
import { trace } from "@opentelemetry/api";
import { getToken } from "./GitHubUtils";
function anonymizeTreeRecursive(
tree: TreeElement,
@@ -48,38 +48,55 @@ function anonymizeTreeRecursive(
export default class Repository {
private _model: IAnonymizedRepositoryDocument;
source: Source;
owner: User;
constructor(data: IAnonymizedRepositoryDocument) {
this._model = data;
switch (data.source.type) {
this.owner = new User(new UserModel({ _id: data.owner }));
this.owner = new User(new UserModel({ _id: data.owner }));
this.owner.model.isNew = false;
}
private checkedToken: boolean = false;
private async getToken() {
if (this.checkedToken) return this._model.source.accessToken as string;
const originalToken = this._model.source.accessToken;
const token = await getToken(this);
if (originalToken != token) {
this._model.source.accessToken = token;
await this._model.save();
}
this.checkedToken = true;
return token;
}
get source() {
switch (this.model.source.type) {
case "GitHubDownload":
this.source = new GitHubDownload(data.source, this.repoId);
break;
return new GitHubDownload({
repoId: this.repoId,
commit: this.model.source.commit || "HEAD",
organization: "",
repoName: this.model.source.repositoryName || "",
getToken: () => this.getToken(),
});
case "GitHubStream":
this.source = new GitHubStream(data.source);
break;
return new GitHubStream({
repoId: this.repoId,
commit: this.model.source.commit || "HEAD",
organization: "",
repoName: this.model.source.repositoryName || "",
getToken: () => this.getToken(),
});
case "Zip":
this.source = new Zip(data.source, this.repoId);
break;
return new Zip(this.model.source, this.repoId);
default:
throw new AnonymousError("unsupported_source", {
object: data.source.type,
object: this,
httpStatus: 400,
});
}
this.owner = new User(new UserModel({ _id: data.owner }));
if (this.source instanceof GitHubBase) {
const originalToken = this._model.source.accessToken;
this.source.getToken(this.owner.id).then((token) => {
if (originalToken != token) {
this._model.source.accessToken = token;
this._model.save();
}
});
}
this.owner.model.isNew = false;
}
/**
@@ -194,8 +211,8 @@ export default class Repository {
image: this.options.image,
link: this.options.link,
repoId: this.repoId,
repoName: (this.source as GitHubBase).githubRepository?.fullName,
branchName: (this.source as GitHubBase).branch?.name || "main",
repoName: this.model.source.repositoryName,
branchName: this.model.source.branch || "main",
});
}
@@ -217,16 +234,17 @@ export default class Repository {
) {
// Only GitHubBase can be update for the moment
if (this.source instanceof GitHubBase) {
const token = await this.source.getToken(this.owner.id);
const branches = await this.source.githubRepository.branches({
const token = await this.getToken();
const ghRepo = new GitHubRepository({});
const branches = await ghRepo.branches({
force: true,
accessToken: token,
});
const branch = this.source.branch;
const newCommit = branches.filter((f) => f.name == branch.name)[0]
const branchName = this.model.source.branch || "main";
const newCommit = branches.filter((f) => f.name == branchName)[0]
?.commit;
if (
branch.commit == newCommit &&
this.model.source.commit == newCommit &&
this.status == RepositoryStatus.READY
) {
console.log(`[UPDATE] ${this._model.repoId} is up to date`);
@@ -235,12 +253,9 @@ export default class Repository {
return;
}
this._model.source.commit = newCommit;
const commitInfo = await this.source.githubRepository.getCommitInfo(
newCommit,
{
accessToken: token,
}
);
const commitInfo = await ghRepo.getCommitInfo(newCommit, {
accessToken: token,
});
if (
commitInfo.commit?.author?.date ||
commitInfo.commit?.committer?.date
@@ -249,11 +264,11 @@ export default class Repository {
commitInfo.commit.committer?.date) as string;
this._model.source.commitDate = new Date(d);
}
branch.commit = newCommit;
this.model.source.commit = newCommit;
if (!newCommit) {
console.error(
`${branch.name} for ${this.source.githubRepository.fullName} is not found`
`${branchName} for ${this.model.source.repositoryName} is not found`
);
await this.updateStatus(RepositoryStatus.ERROR, "branch_not_found");
await this.resetSate();
@@ -268,23 +283,6 @@ export default class Repository {
`[UPDATE] ${this._model.repoId} will be updated to ${newCommit}`
);
if (this.source.type == "GitHubDownload") {
const repository = await getRepositoryFromGitHub({
accessToken: await this.source.getToken(this.owner.id),
owner: this.source.githubRepository.owner,
repo: this.source.githubRepository.repo,
});
if (
repository.size === undefined ||
repository.size > config.MAX_REPO_SIZE
) {
console.log(
`[UPDATE] ${this._model.repoId} will be streamed instead of downloaded`
);
this._model.source.type = "GitHubStream";
}
}
await this.resetSate(RepositoryStatus.PREPARING);
await downloadQueue.add(this.repoId, this, {
jobId: this.repoId,
@@ -513,10 +511,15 @@ export default class Repository {
anonymizeDate: this._model.anonymizeDate,
status: this.status,
statusMessage: this._model.statusMessage,
source: this.source.toJSON(),
lastView: this._model.lastView,
pageView: this._model.pageView,
size: this.size,
source: {
fullName: this.model.source.repositoryName,
commit: this.model.source.commit,
branch: this.model.source.branch,
type: this.model.source.type,
},
};
}
}

View File

@@ -6,7 +6,7 @@ import { GitHubRepository } from "./source/GitHubRepository";
import PullRequest from "./PullRequest";
import AnonymizedPullRequestModel from "./database/anonymizedPullRequests/anonymizedPullRequests.model";
import { trace } from "@opentelemetry/api";
import GitHubBase from "./source/GitHubBase";
import { octokit } from "./GitHubUtils";
/**
* Model for a user
@@ -66,9 +66,9 @@ export default class User {
opt?.force === true
) {
// get the list of repo from github
const octokit = GitHubBase.octokit(this.accessToken);
const oct = octokit(this.accessToken);
const repositories = (
await octokit.paginate("GET /user/repos", {
await oct.paginate("GET /user/repos", {
visibility: "all",
sort: "pushed",
per_page: 100,

View File

@@ -1,10 +1,8 @@
import config from "../config";
import GitHubBase from "./source/GitHubBase";
import { isText } from "istextorbinary";
import { basename } from "path";
import { Transform } from "stream";
import { Readable } from "stream";
import AnonymizedFile from "./AnonymizedFile";
import { trace } from "@opentelemetry/api";
const urlRegex =
@@ -33,16 +31,21 @@ export function isTextFile(filePath: string, content?: Buffer) {
}
export class AnonymizeTransformer extends Transform {
public wasAnonimized = false;
public isText: boolean | null = null;
anonimizer: ContentAnonimizer;
constructor(
private readonly opt: {
filePath: string;
} & ConstructorParameters<typeof ContentAnonimizer>[1]
} & ConstructorParameters<typeof ContentAnonimizer>[0]
) {
super();
this.isText = isTextFile(this.opt.filePath);
this.anonimizer = new ContentAnonimizer(this.opt);
}
get wasAnonimized() {
return this.anonimizer.wasAnonymized;
}
_transform(chunk: Buffer, encoding: string, callback: () => void) {
@@ -55,11 +58,9 @@ export class AnonymizeTransformer extends Transform {
}
if (this.isText) {
const anonimizer = new ContentAnonimizer(chunk.toString(), this.opt);
anonimizer.anonymize();
if (anonimizer.wasAnonymized) {
this.wasAnonimized = true;
chunk = Buffer.from(anonimizer.content);
const content = this.anonimizer.anonymize(chunk.toString());
if (this.anonimizer.wasAnonymized) {
chunk = Buffer.from(content);
}
}
@@ -76,25 +77,10 @@ export class AnonymizeTransformer extends Transform {
}
}
interface Anonymizationptions {
repoId?: string;
source?: {};
options: {
terms: string[];
image: boolean;
link: boolean;
pageSource?: {
branch: string;
path: string;
};
};
}
export class ContentAnonimizer {
public wasAnonymized = false;
constructor(
public content: string,
readonly opt: {
image?: boolean;
link?: boolean;
@@ -105,12 +91,12 @@ export class ContentAnonimizer {
}
) {}
private removeImage() {
private removeImage(content: string): string {
if (this.opt.image !== false) {
return;
return content;
}
// remove image in markdown
this.content = this.content.replace(
return content.replace(
/!\[[^\]]*\]\((?<filename>.*?)(?=\"|\))(?<optionalpart>\".*\")?\)/g,
() => {
this.wasAnonymized = true;
@@ -118,20 +104,20 @@ export class ContentAnonimizer {
}
);
}
private removeLink() {
private removeLink(content: string): string {
if (this.opt.link !== false) {
return;
return content;
}
// remove image in markdown
this.content = this.content.replace(urlRegex, () => {
return content.replace(urlRegex, () => {
this.wasAnonymized = true;
return config.ANONYMIZATION_MASK;
});
}
private replaceGitHubSelfLinks() {
private replaceGitHubSelfLinks(content: string): string {
if (!this.opt.repoName || !this.opt.branchName) {
return;
return content;
}
const repoName = this.opt.repoName;
const branchName = this.opt.branchName;
@@ -140,28 +126,28 @@ export class ContentAnonimizer {
this.wasAnonymized = true;
return `https://${config.APP_HOSTNAME}/r/${this.opt.repoId}`;
};
this.content = this.content.replace(
content = content.replace(
new RegExp(
`https://raw.githubusercontent.com/${repoName}/${branchName}\\b`,
"gi"
),
replaceCallback
);
this.content = this.content.replace(
content = content.replace(
new RegExp(`https://github.com/${repoName}/blob/${branchName}\\b`, "gi"),
replaceCallback
);
this.content = this.content.replace(
content = content.replace(
new RegExp(`https://github.com/${repoName}/tree/${branchName}\\b`, "gi"),
replaceCallback
);
this.content = this.content.replace(
return content.replace(
new RegExp(`https://github.com/${repoName}`, "gi"),
replaceCallback
);
}
private replaceTerms() {
private replaceTerms(content: string): string {
const terms = this.opt.terms || [];
for (let i = 0; i < terms.length; i++) {
let term = terms[i];
@@ -176,7 +162,7 @@ export class ContentAnonimizer {
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
}
// remove whole url if it contains the term
this.content = this.content.replace(urlRegex, (match) => {
content = content.replace(urlRegex, (match) => {
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) {
this.wasAnonymized = true;
return mask;
@@ -185,56 +171,34 @@ export class ContentAnonimizer {
});
// remove the term in the text
this.content = this.content.replace(
new RegExp(`\\b${term}\\b`, "gi"),
() => {
this.wasAnonymized = true;
return mask;
}
);
return content.replace(new RegExp(`\\b${term}\\b`, "gi"), () => {
this.wasAnonymized = true;
return mask;
});
}
return content;
}
anonymize() {
anonymize(content: string) {
const span = trace
.getTracer("ano-file")
.startSpan("ContentAnonimizer.anonymize");
try {
this.removeImage();
content = this.removeImage(content);
span.addEvent("removeImage");
this.removeLink();
content = this.removeLink(content);
span.addEvent("removeLink");
this.replaceGitHubSelfLinks();
content = this.replaceGitHubSelfLinks(content);
span.addEvent("replaceGitHubSelfLinks");
this.replaceTerms();
content = this.replaceTerms(content);
span.addEvent("replaceTerms");
return this.content;
return content;
} finally {
span.end();
}
}
}
export function anonymizeContent(
content: string,
repository: Anonymizationptions
) {
let repoName: string | undefined;
let branchName: string | undefined;
if (repository.source instanceof GitHubBase) {
repoName = repository.source.githubRepository.fullName;
branchName = repository.source.branch.name;
}
return new ContentAnonimizer(content, {
repoId: repository.repoId,
image: repository.options.image,
link: repository.options.link,
terms: repository.options.terms,
repoName,
branchName,
}).anonymize();
}
export function anonymizePath(path: string, terms: string[]) {
return trace
.getTracer("ano-file")

View File

@@ -3,7 +3,6 @@ import Repository from "../Repository";
import { getRepository as getRepositoryImport } from "../database/database";
import { RepositoryStatus } from "../types";
import { trace } from "@opentelemetry/api";
import { Span } from "@opentelemetry/sdk-trace-node";
export default async function (job: SandboxedJob<Repository, void>) {
const {

View File

@@ -9,7 +9,6 @@ import Repository from "../Repository";
import User from "../User";
import { ensureAuthenticated } from "./connection";
import { handleError, getUser, isOwnerOrAdmin, getRepo } from "./route-utils";
import RepositoryModel from "../database/repositories/repositories.model";
const router = express.Router();

View File

@@ -5,9 +5,7 @@ import * as db from "../database/database";
import { getRepo, getUser, handleError, isOwnerOrAdmin } from "./route-utils";
import { getRepositoryFromGitHub } from "../source/GitHubRepository";
import gh = require("parse-github-url");
import GitHubBase from "../source/GitHubBase";
import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model";
import config from "../../config";
import { IAnonymizedRepositoryDocument } from "../database/anonymizedRepositories/anonymizedRepositories.types";
import Repository from "../Repository";
import UserModel from "../database/users/users.model";
@@ -18,6 +16,7 @@ import RepositoryModel from "../database/repositories/repositories.model";
import User from "../User";
import { RepositoryStatus } from "../types";
import { IUserDocument } from "../database/users/users.types";
import { checkToken } from "../GitHubUtils";
const router = express.Router();
@@ -41,7 +40,7 @@ async function getTokenForAdmin(user: User, req: express.Request) {
});
const user: IUserDocument = existingRepo?.owner as any;
if (user instanceof UserModel) {
const check = await GitHubBase.checkToken(user.accessTokens.github);
const check = await checkToken(user.accessTokens.github);
if (check) {
return user.accessTokens.github;
}
@@ -100,7 +99,7 @@ router.post("/claim", async (req: express.Request, res: express.Response) => {
}
const dbRepo = await RepositoryModel.findById(
(repoConfig.source as GitHubBase).githubRepository.id
repoConfig.model.source.repositoryId
);
if (!dbRepo || dbRepo.externalId != repo.id) {

View File

@@ -86,9 +86,9 @@ router.get(
if (
repo.status == "expired" &&
repo.options.expirationMode == "redirect" &&
repo.source.url
repo.model.source.repositoryName
) {
redirectURL = repo.source.url;
redirectURL = `https://github.com/${repo.model.source.repositoryName}`;
} else {
if (
repo.status == "expired" ||

View File

@@ -51,9 +51,9 @@ export async function getRepo(
if (
repo.status == "expired" &&
repo.options.expirationMode == "redirect" &&
repo.source.url
repo.model.source.repositoryId
) {
res.redirect(repo.source.url);
res.redirect(`https://github.com/${repo.model.source.repositoryName}`);
return null;
}

View File

@@ -2,7 +2,6 @@ import * as express from "express";
import { getRepo, handleError } from "./route-utils";
import * as path from "path";
import AnonymizedFile from "../AnonymizedFile";
import GitHubDownload from "../source/GitHubDownload";
import AnonymousError from "../AnonymousError";
import { Tree, TreeElement } from "../types";
import * as marked from "marked";
@@ -36,10 +35,7 @@ async function webView(req: express.Request, res: express.Response) {
});
}
if (
repo.options.pageSource?.branch !=
(repo.source as GitHubDownload).branch.name
) {
if (repo.options.pageSource?.branch != repo.model.source.branch) {
throw new AnonymousError("page_not_supported_on_different_branch", {
httpStatus: 400,
object: repo,

View File

@@ -1,39 +1,36 @@
import { Octokit } from "@octokit/rest";
import { trace } from "@opentelemetry/api";
import { Readable } from "stream";
import AnonymizedFile from "../AnonymizedFile";
import { Branch, Tree } from "../types";
import { GitHubRepository } from "./GitHubRepository";
import config from "../../config";
import UserModel from "../database/users/users.model";
import { Tree } from "../types";
export default abstract class GitHubBase {
export interface GitHubBaseData {
getToken: () => string | Promise<string>;
repoId: string;
organization: string;
repoName: string;
commit: string;
}
export interface SourceBase {
readonly type: string;
/**
* Retrieve the fie content
* @param file the file of the content to retrieve
*/
getFileContent(file: AnonymizedFile): Promise<Readable>;
/**
* Get all the files from a specific source
*/
getFiles(progress?: (status: string) => void): Promise<Tree>;
}
export default abstract class GitHubBase implements SourceBase {
abstract type: "GitHubDownload" | "GitHubStream" | "Zip";
githubRepository: GitHubRepository;
branch: Branch;
accessToken: string | undefined;
validToken: boolean = false;
constructor(data: {
accessToken?: string;
commit?: string;
branch?: string;
repositoryId?: string;
repositoryName?: string;
}) {
this.accessToken = data.accessToken;
const branches = [];
if (data.branch && data.commit) {
branches.push({ commit: data.commit, name: data.branch });
}
this.githubRepository = new GitHubRepository({
name: data.repositoryName,
externalId: data.repositoryId,
branches,
});
this.branch = branches[0];
}
constructor(readonly data: GitHubBaseData) {}
abstract getFileContent(
file: AnonymizedFile,
@@ -41,69 +38,4 @@ export default abstract class GitHubBase {
): Promise<Readable>;
abstract getFiles(progress?: (status: string) => void): Promise<Tree>;
static octokit(token: string) {
return new Octokit({
auth: token,
request: {
fetch: fetch,
},
});
}
static async checkToken(token: string) {
const octokit = GitHubBase.octokit(token);
try {
await octokit.users.getAuthenticated();
return true;
} catch (error) {
return false;
}
}
async getToken(ownerID?: any) {
const span = trace.getTracer("ano-file").startSpan("GHBase.getToken");
span.setAttribute("repoId", this.githubRepository.fullName || "");
try {
if (this.validToken) {
return this.accessToken as string;
}
if (ownerID) {
const user = await UserModel.findById(ownerID, {
accessTokens: 1,
});
if (user?.accessTokens.github) {
const check = await GitHubBase.checkToken(user.accessTokens.github);
if (check) {
this.accessToken = user.accessTokens.github;
this.validToken = true;
return this.accessToken;
}
}
}
if (this.accessToken) {
if (await GitHubBase.checkToken(this.accessToken)) {
this.validToken = true;
return this.accessToken;
}
}
this.accessToken = config.GITHUB_TOKEN;
return this.accessToken;
} finally {
span.end();
}
}
get url() {
return "https://github.com/" + this.githubRepository.fullName;
}
toJSON(): any {
return {
type: this.type,
fullName: this.githubRepository.fullName?.toString(),
branch: this.branch?.name,
commit: this.branch?.commit,
};
}
}

View File

@@ -3,56 +3,45 @@ import { Readable } from "stream";
import { OctokitResponse } from "@octokit/types";
import storage from "../storage";
import GitHubBase from "./GitHubBase";
import GitHubBase, { GitHubBaseData } from "./GitHubBase";
import AnonymizedFile from "../AnonymizedFile";
import { SourceBase } from "../types";
import AnonymousError from "../AnonymousError";
import { trace } from "@opentelemetry/api";
import { FILE_TYPE } from "../storage/Storage";
import { octokit } from "../GitHubUtils";
export default class GitHubDownload extends GitHubBase implements SourceBase {
export default class GitHubDownload extends GitHubBase {
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubDownload";
constructor(
data: {
branch?: string;
commit?: string;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
},
readonly repoId: string
) {
constructor(data: GitHubBaseData) {
super(data);
}
private async _getZipUrl(
auth: string
): Promise<OctokitResponse<unknown, 302>> {
const octokit = GitHubBase.octokit(auth as string);
return octokit.rest.repos.downloadZipballArchive({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
ref: this.branch?.commit || "HEAD",
private async _getZipUrl(): Promise<OctokitResponse<unknown, 302>> {
const oct = octokit(await this.data.getToken());
return oct.rest.repos.downloadZipballArchive({
owner: this.data.organization,
repo: this.data.repoName,
ref: this.data.commit || "HEAD",
method: "HEAD",
});
}
async download(token: string, progress?: (status: string) => void) {
async download(progress?: (status: string) => void) {
const span = trace.getTracer("ano-file").startSpan("GHDownload.download");
span.setAttribute("repoId", this.githubRepository.fullName || "");
span.setAttribute("repoId", this.data.repoId);
try {
let response: OctokitResponse<unknown, number>;
try {
response = await this._getZipUrl(token);
response = await this._getZipUrl();
} catch (error) {
span.recordException(error as Error);
throw new AnonymousError("repo_not_accessible", {
httpStatus: 404,
object: this.githubRepository,
object: this.data,
cause: error as Error,
});
}
await storage.mk(this.repoId);
await storage.mk(this.data.repoId);
let downloadProgress: { transferred: number } | undefined = undefined;
let progressTimeout;
let inDownload = true;
@@ -73,18 +62,17 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
downloadProgress = p;
});
await storage.extractZip(
this.repoId,
this.data.repoId,
"",
downloadStream,
undefined,
this
this.type
);
} catch (error) {
span.recordException(error as Error);
throw new AnonymousError("unable_to_download", {
httpStatus: 500,
cause: error as Error,
object: this.githubRepository,
object: this.data,
});
} finally {
inDownload = false;
@@ -102,11 +90,11 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
const span = trace
.getTracer("ano-file")
.startSpan("GHDownload.getFileContent");
span.setAttribute("repoId", this.githubRepository.fullName || "");
span.setAttribute("repoId", file.repository.repoId);
try {
const exists = await storage.exists(file.filePath);
if (exists === FILE_TYPE.FILE) {
return storage.read(this.repoId, file.filePath);
return storage.read(this.data.repoId, file.filePath);
} else if (exists === FILE_TYPE.FOLDER) {
throw new AnonymousError("folder_not_supported", {
httpStatus: 400,
@@ -117,20 +105,17 @@ export default class GitHubDownload extends GitHubBase implements SourceBase {
await file.originalPath();
// the cache is not ready, we need to download the repository
await this.download(
await this.getToken(file.repository.owner.id),
progress
);
return storage.read(this.repoId, file.filePath);
await this.download(progress);
return storage.read(this.data.repoId, file.filePath);
} finally {
span.end();
}
}
async getFiles() {
if ((await storage.exists(this.repoId)) === FILE_TYPE.NOT_FOUND) {
await this.download(await this.getToken());
async getFiles(progress?: (status: string) => void) {
if ((await storage.exists(this.data.repoId)) === FILE_TYPE.NOT_FOUND) {
await this.download(progress);
}
return storage.listFiles(this.repoId);
return storage.listFiles(this.data.repoId);
}
}

View File

@@ -6,7 +6,7 @@ import RepositoryModel from "../database/repositories/repositories.model";
import AnonymousError from "../AnonymousError";
import { isConnected } from "../database/database";
import { trace } from "@opentelemetry/api";
import GitHubBase from "./GitHubBase";
import { octokit } from "../GitHubUtils";
export class GitHubRepository {
private _data: Partial<{
@@ -49,7 +49,7 @@ export class GitHubRepository {
async getCommitInfo(
sha: string,
opt: {
accessToken?: string;
accessToken: string;
}
) {
const span = trace
@@ -58,8 +58,8 @@ export class GitHubRepository {
span.setAttribute("owner", this.owner);
span.setAttribute("repo", this.repo);
try {
const octokit = GitHubBase.octokit(opt.accessToken as string);
const commit = await octokit.repos.getCommit({
const oct = octokit(opt.accessToken);
const commit = await oct.repos.getCommit({
owner: this.owner,
repo: this.repo,
ref: sha,
@@ -71,7 +71,7 @@ export class GitHubRepository {
}
async branches(opt: {
accessToken?: string;
accessToken: string;
force?: boolean;
}): Promise<Branch[]> {
const span = trace.getTracer("ano-file").startSpan("GHRepository.branches");
@@ -84,10 +84,10 @@ export class GitHubRepository {
opt?.force === true
) {
// get the list of repo from github
const octokit = GitHubBase.octokit(opt.accessToken as string);
const oct = octokit(opt.accessToken);
try {
const branches = (
await octokit.paginate("GET /repos/{owner}/{repo}/branches", {
await oct.paginate("GET /repos/{owner}/{repo}/branches", {
owner: this.owner,
repo: this.repo,
per_page: 100,
@@ -132,7 +132,7 @@ export class GitHubRepository {
async readme(opt: {
branch?: string;
force?: boolean;
accessToken?: string;
accessToken: string;
}): Promise<string | undefined> {
const span = trace.getTracer("ano-file").startSpan("GHRepository.readme");
span.setAttribute("owner", this.owner);
@@ -154,9 +154,9 @@ export class GitHubRepository {
const selected = model.branches.filter((f) => f.name == opt.branch)[0];
if (selected && (!selected.readme || opt?.force === true)) {
// get the list of repo from github
const octokit = GitHubBase.octokit(opt.accessToken as string);
const oct = octokit(opt.accessToken);
try {
const ghRes = await octokit.repos.getReadme({
const ghRes = await oct.repos.getReadme({
owner: this.owner,
repo: this.repo,
ref: selected?.commit,
@@ -239,11 +239,11 @@ export async function getRepositoryFromGitHub(opt: {
if (opt.repo.indexOf(".git") > -1) {
opt.repo = opt.repo.replace(".git", "");
}
const octokit = GitHubBase.octokit(opt.accessToken as string);
const oct = octokit(opt.accessToken);
let r: RestEndpointMethodTypes["repos"]["get"]["response"]["data"];
try {
r = (
await octokit.repos.get({
await oct.repos.get({
owner: opt.owner,
repo: opt.repo,
})
@@ -282,7 +282,7 @@ export async function getRepositoryFromGitHub(opt: {
model.defaultBranch = r.default_branch;
model.hasPage = r.has_pages;
if (model.hasPage) {
const ghPageRes = await octokit.repos.getPages({
const ghPageRes = await oct.repos.getPages({
owner: opt.owner,
repo: opt.repo,
});

View File

@@ -1,7 +1,7 @@
import AnonymizedFile from "../AnonymizedFile";
import GitHubBase from "./GitHubBase";
import GitHubBase, { GitHubBaseData } from "./GitHubBase";
import storage from "../storage";
import { SourceBase, Tree } from "../types";
import { Tree } from "../types";
import * as path from "path";
import got from "got";
@@ -10,28 +10,23 @@ import AnonymousError from "../AnonymousError";
import config from "../../config";
import { trace } from "@opentelemetry/api";
import { FILE_TYPE } from "../storage/Storage";
import { octokit } from "../GitHubUtils";
export default class GitHubStream extends GitHubBase implements SourceBase {
export default class GitHubStream extends GitHubBase {
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream";
constructor(data: {
branch?: string;
commit?: string;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
}) {
constructor(data: GitHubBaseData) {
super(data);
}
downloadFile(sha: string, token: string) {
downloadFile(token: string, sha: string) {
const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile");
span.setAttribute("sha", sha);
const octokit = GitHubBase.octokit(token);
const oct = octokit(token);
try {
const { url } = octokit.rest.git.getBlob.endpoint({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
const { url } = oct.rest.git.getBlob.endpoint({
owner: this.data.organization,
repo: this.data.repoName,
file_sha: sha,
});
return got.stream(url, {
@@ -46,7 +41,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
// span.recordException(error as Error);
throw new AnonymousError("repo_not_accessible", {
httpStatus: 404,
object: this.githubRepository,
object: this.data,
cause: error as Error,
});
} finally {
@@ -88,8 +83,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
});
}
try {
const token = await this.getToken(file.repository.owner.id);
const content = this.downloadFile(file_sha, token);
const content = this.downloadFile(await this.data.getToken(), file_sha);
// duplicate the stream to write it to the storage
const stream1 = content.pipe(new stream.PassThrough());
@@ -98,8 +92,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
file.repository.repoId,
file.filePath,
stream1,
file,
this
this.type
);
return stream2;
} catch (error) {
@@ -126,17 +119,15 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
async getFiles() {
const span = trace.getTracer("ano-file").startSpan("GHStream.getFiles");
span.setAttribute("repoName", this.githubRepository.fullName || "");
span.setAttribute("repoId", this.data.repoId);
try {
let commit = this.branch?.commit;
return this.getTree(await this.getToken(), commit);
return this.getTree(this.data.commit);
} finally {
span.end();
}
}
private async getTree(
token: string,
sha: string,
truncatedTree: Tree = {},
parentPath: string = "",
@@ -146,13 +137,12 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
}
) {
const span = trace.getTracer("ano-file").startSpan("GHStream.getTree");
span.setAttribute("repoName", this.githubRepository.fullName || "");
span.setAttribute("sha", sha);
let ghRes: Awaited<ReturnType<typeof this.getGHTree>>;
try {
count.request++;
ghRes = await this.getGHTree(token, sha, { recursive: true });
ghRes = await this.getGHTree(sha, { recursive: true });
} catch (error) {
console.error(error);
span.recordException(error as Error);
@@ -165,8 +155,6 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
httpStatus: (error as any).status,
cause: error as Error,
object: {
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
tree_sha: sha,
},
});
@@ -178,24 +166,20 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
const tree = this.tree2Tree(ghRes.tree, truncatedTree, parentPath);
count.file += ghRes.tree.length;
if (ghRes.truncated) {
await this.getTruncatedTree(token, sha, tree, parentPath, count);
await this.getTruncatedTree(sha, tree, parentPath, count);
}
span.end();
return tree;
}
private async getGHTree(
token: string,
sha: string,
opt = { recursive: true }
) {
private async getGHTree(sha: string, opt = { recursive: true }) {
const span = trace.getTracer("ano-file").startSpan("GHStream.getGHTree");
span.setAttribute("sha", sha);
try {
const octokit = GitHubBase.octokit(token);
const ghRes = await octokit.git.getTree({
owner: this.githubRepository.owner,
repo: this.githubRepository.repo,
const oct = octokit(await this.data.getToken());
const ghRes = await oct.git.getTree({
owner: this.data.organization,
repo: this.data.repoName,
tree_sha: sha,
recursive: opt.recursive ? "1" : undefined,
});
@@ -206,7 +190,6 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
}
private async getTruncatedTree(
token: string,
sha: string,
truncatedTree: Tree = {},
parentPath: string = "",
@@ -226,7 +209,9 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
let data = null;
try {
data = await this.getGHTree(token, sha, { recursive: false });
data = await this.getGHTree(sha, {
recursive: false,
});
this.tree2Tree(data.tree, truncatedTree, parentPath);
} catch (error) {
span.recordException(error as Error);
@@ -241,7 +226,6 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
const elementPath = path.join(parentPath, file.path);
promises.push(
this.getTruncatedTree(
token,
file.sha,
truncatedTree,
elementPath,
@@ -254,7 +238,9 @@ export default class GitHubStream extends GitHubBase implements SourceBase {
await Promise.all(promises);
} else {
try {
const data = await this.getGHTree(token, sha, { recursive: true });
const data = await this.getGHTree(sha, {
recursive: true,
});
this.tree2Tree(data.tree, truncatedTree, parentPath);
if (data.truncated) {
// TODO: TRUNCATED

View File

@@ -1,7 +1,7 @@
import AnonymizedFile from "../AnonymizedFile";
import storage from "../storage";
import { SourceBase } from "../types";
import * as stream from "stream";
import { SourceBase } from "./GitHubBase";
export default class Zip implements SourceBase {
type = "Zip";

View File

@@ -1,6 +1,5 @@
import { SourceBase, Tree } from "../types";
import { Tree } from "../types";
import config from "../../config";
import { Stream } from "node:stream";
import * as fs from "fs";
import { Extract } from "unzip-stream";
import { join, basename, dirname } from "path";
@@ -8,7 +7,6 @@ import { Response } from "express";
import { Readable, pipeline, Transform } from "stream";
import * as archiver from "archiver";
import { promisify } from "util";
import AnonymizedFile from "../AnonymizedFile";
import { lookup } from "mime-types";
import { trace } from "@opentelemetry/api";
import StorageBase, { FILE_TYPE } from "./Storage";
@@ -78,9 +76,7 @@ export default class FileSystem extends StorageBase {
async write(
repoId: string,
p: string,
data: string | Readable,
file?: AnonymizedFile,
source?: SourceBase
data: string | Readable
): Promise<void> {
const span = trace.getTracer("ano-file").startSpan("fs.write");
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
@@ -170,13 +166,7 @@ export default class FileSystem extends StorageBase {
}
/** @override */
async extractZip(
repoId: string,
p: string,
data: Readable,
file?: AnonymizedFile,
source?: SourceBase
): Promise<void> {
async extractZip(repoId: string, p: string, data: Readable): Promise<void> {
const pipe = promisify(pipeline);
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
return pipe(

View File

@@ -14,9 +14,8 @@ import { contentType } from "mime-types";
import * as archiver from "archiver";
import { trace } from "@opentelemetry/api";
import { dirname, basename, join } from "path";
import { SourceBase, Tree, TreeFile } from "../types";
import { Tree, TreeFile } from "../types";
import AnonymousError from "../AnonymousError";
import AnonymizedFile from "../AnonymizedFile";
import StorageBase, { FILE_TYPE } from "./Storage";
export default class S3Storage extends StorageBase {
@@ -205,8 +204,7 @@ export default class S3Storage extends StorageBase {
repoId: string,
path: string,
data: string | Readable,
file?: AnonymizedFile,
source?: SourceBase
source?: string
): Promise<void> {
const span = trace.getTracer("ano-file").startSpan("s3.rm");
span.setAttribute("repoId", repoId);
@@ -221,7 +219,7 @@ export default class S3Storage extends StorageBase {
ContentType: contentType(path).toString(),
};
if (source) {
params.Tagging = `source=${source.type}`;
params.Tagging = `source=${source}`;
}
const parallelUploads3 = new Upload({
@@ -289,8 +287,7 @@ export default class S3Storage extends StorageBase {
repoId: string,
path: string,
data: Readable,
file?: AnonymizedFile,
source?: SourceBase
source?: string
): Promise<void> {
let toS3: ArchiveStreamToS3;
const span = trace.getTracer("ano-file").startSpan("s3.extractZip");
@@ -305,9 +302,9 @@ export default class S3Storage extends StorageBase {
onEntry: (header) => {
header.name = header.name.substring(header.name.indexOf("/") + 1);
if (source) {
header.Tagging = `source=${source.type}`;
header.Tagging = `source=${source}`;
header.Metadata = {
source: source.type,
source: source,
};
}
},

View File

@@ -3,8 +3,7 @@ import { Transform, Readable } from "stream";
import * as archiver from "archiver";
import { Response } from "express";
import AnonymizedFile from "../AnonymizedFile";
import { SourceBase, Tree } from "../types";
import { Tree } from "../types";
export enum FILE_TYPE {
FILE = "file",
@@ -52,8 +51,7 @@ export default abstract class StorageBase {
repoId: string,
path: string,
data: string | Readable,
file?: AnonymizedFile,
source?: SourceBase
source?: string
): Promise<void>;
/**
@@ -73,8 +71,7 @@ export default abstract class StorageBase {
repoId: string,
dir: string,
tar: Readable,
file?: AnonymizedFile,
source?: SourceBase
source?: string
): Promise<void>;
/**

View File

@@ -3,30 +3,6 @@ import GitHubStream from "./source/GitHubStream";
import Zip from "./source/Zip";
import S3Storage from "./storage/S3";
import FileSystem from "./storage/FileSystem";
import AnonymizedFile from "./AnonymizedFile";
import { Readable } from "stream";
export interface SourceBase {
readonly type: string;
/**
* The url of the source
*/
url?: string;
/**
* Retrieve the fie content
* @param file the file of the content to retrieve
*/
getFileContent(file: AnonymizedFile): Promise<Readable>;
/**
* Get all the files from a specific source
*/
getFiles(): Promise<Tree>;
toJSON(): any;
}
export type Source = GitHubDownload | GitHubStream | Zip;