From fa2591fe380d3e7c62cc7f394e6854b07218f9ad Mon Sep 17 00:00:00 2001 From: tdurieux Date: Tue, 2 Apr 2024 13:51:13 +0100 Subject: [PATCH] refactor: uncouple repository class & token --- src/AnonymizedFile.ts | 9 +-- src/AnonymousError.ts | 2 +- src/GitHubUtils.ts | 57 ++++++++++++++ src/PullRequest.ts | 39 +++++----- src/Repository.ts | 115 ++++++++++++++-------------- src/User.ts | 6 +- src/anonymize-utils.ts | 106 +++++++++----------------- src/processes/removeRepository.ts | 1 - src/routes/admin.ts | 1 - src/routes/repository-private.ts | 7 +- src/routes/repository-public.ts | 4 +- src/routes/route-utils.ts | 4 +- src/routes/webview.ts | 6 +- src/source/GitHubBase.ts | 120 +++++++----------------------- src/source/GitHubDownload.ts | 67 +++++++---------- src/source/GitHubRepository.ts | 26 +++---- src/source/GitHubStream.ts | 70 +++++++---------- src/source/Zip.ts | 2 +- src/storage/FileSystem.ts | 16 +--- src/storage/S3.ts | 15 ++-- src/storage/Storage.ts | 9 +-- src/types.ts | 24 ------ 22 files changed, 290 insertions(+), 416 deletions(-) create mode 100644 src/GitHubUtils.ts diff --git a/src/AnonymizedFile.ts b/src/AnonymizedFile.ts index 450f9e6..bbf8a47 100644 --- a/src/AnonymizedFile.ts +++ b/src/AnonymizedFile.ts @@ -4,18 +4,11 @@ import { Readable } from "stream"; import { trace } from "@opentelemetry/api"; import Repository from "./Repository"; import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types"; -import storage from "./storage"; import config from "../config"; -import { - anonymizePath, - AnonymizeTransformer, - isTextFile, -} from "./anonymize-utils"; +import { anonymizePath, isTextFile } from "./anonymize-utils"; import AnonymousError from "./AnonymousError"; import { handleError } from "./routes/route-utils"; import { lookup } from "mime-types"; -import { FILE_TYPE } from "./storage/Storage"; -import GitHubBase from "./source/GitHubBase"; /** * Represent a file in a anonymized repository diff --git a/src/AnonymousError.ts b/src/AnonymousError.ts index ab9fcb2..68a1c08 100644 --- a/src/AnonymousError.ts +++ b/src/AnonymousError.ts @@ -39,7 +39,7 @@ export default class AnonymousError extends CustomError { } else if (this.value instanceof User) { detail = `${this.value.username}`; } else if (this.value instanceof GitHubBase) { - detail = `${this.value.githubRepository.fullName}`; + detail = `GHDownload ${this.value.data.repoId}`; } out += this.message; if (detail) { diff --git a/src/GitHubUtils.ts b/src/GitHubUtils.ts new file mode 100644 index 0000000..6353007 --- /dev/null +++ b/src/GitHubUtils.ts @@ -0,0 +1,57 @@ +import { trace } from "@opentelemetry/api"; +import Repository from "./Repository"; +import { Octokit } from "@octokit/rest"; +import UserModel from "./database/users/users.model"; +import config from "../config"; + +export function octokit(token: string) { + return new Octokit({ + auth: token, + request: { + fetch: fetch, + }, + }); +} + +export async function checkToken(token: string) { + const oct = octokit(token); + try { + await oct.users.getAuthenticated(); + return true; + } catch (error) { + return false; + } +} + +export async function getToken(repository: Repository) { + const span = trace.getTracer("ano-file").startSpan("GHUtils.getToken"); + span.setAttribute("repoId", repository.repoId); + try { + if (repository.model.source.accessToken) { + if (await checkToken(repository.model.source.accessToken)) { + return repository.model.source.accessToken; + } + } + if (!repository.owner.model.accessTokens?.github) { + const accessTokens = ( + await UserModel.findById(repository.owner.id, { + accessTokens: 1, + }) + )?.accessTokens; + if (accessTokens) { + repository.owner.model.accessTokens = accessTokens; + } + } + if (repository.owner.model.accessTokens?.github) { + const check = await checkToken( + repository.owner.model.accessTokens?.github + ); + if (check) { + return repository.owner.model.accessTokens?.github; + } + } + return config.GITHUB_TOKEN; + } finally { + span.end(); + } +} diff --git a/src/PullRequest.ts b/src/PullRequest.ts index 0bef31e..93eff74 100644 --- a/src/PullRequest.ts +++ b/src/PullRequest.ts @@ -1,6 +1,5 @@ -import { RepositoryStatus, Source, Tree, TreeElement, TreeFile } from "./types"; +import { RepositoryStatus } from "./types"; import User from "./User"; -import { anonymizeContent, anonymizePath } from "./anonymize-utils"; import UserModel from "./database/users/users.model"; import Conference from "./Conference"; import ConferenceModel from "./database/conference/conferences.model"; @@ -8,7 +7,8 @@ import AnonymousError from "./AnonymousError"; import { IAnonymizedPullRequestDocument } from "./database/anonymizedPullRequests/anonymizedPullRequests.types"; import config from "../config"; import got from "got"; -import GitHubBase from "./source/GitHubBase"; +import { octokit } from "./GitHubUtils"; +import { ContentAnonimizer } from "./anonymize-utils"; export default class PullRequest { private _model: IAnonymizedPullRequestDocument; @@ -52,26 +52,23 @@ export default class PullRequest { "[INFO] Downloading pull request", this._model.source.pullRequestId ); - const octokit = GitHubBase.octokit(await this.getToken()); + const oct = octokit(await this.getToken()); const [owner, repo] = this._model.source.repositoryFullName.split("/"); const pull_number = this._model.source.pullRequestId; const [prInfo, comments, diff] = await Promise.all([ - octokit.rest.pulls.get({ + oct.rest.pulls.get({ owner, repo, pull_number, }), - octokit.paginate( - "GET /repos/{owner}/{repo}/issues/{issue_number}/comments", - { - owner: owner, - repo: repo, - issue_number: pull_number, - per_page: 100, - } - ), + oct.paginate("GET /repos/{owner}/{repo}/issues/{issue_number}/comments", { + owner: owner, + repo: repo, + issue_number: pull_number, + per_page: 100, + }), got(`https://github.com/${owner}/${repo}/pull/${pull_number}.diff`), ]); @@ -250,18 +247,22 @@ export default class PullRequest { state: this._model.pullRequest.state, draft: this._model.pullRequest.draft, }; + const anonymizer = new ContentAnonimizer({ + ...this.options, + repoId: this.pullRequestId, + }); if (this.options.title) { - output.title = anonymizeContent(this._model.pullRequest.title, this); + output.title = anonymizer.anonymize(this._model.pullRequest.title); } if (this.options.body) { - output.body = anonymizeContent(this._model.pullRequest.body, this); + output.body = anonymizer.anonymize(this._model.pullRequest.body); } if (this.options.comments) { output.comments = this._model.pullRequest.comments?.map((comment) => { const o: any = {}; - if (this.options.body) o.body = anonymizeContent(comment.body, this); + if (this.options.body) o.body = anonymizer.anonymize(comment.body); if (this.options.username) - o.author = anonymizeContent(comment.author, this); + o.author = anonymizer.anonymize(comment.author); if (this.options.date) { o.updatedDate = comment.updatedDate; o.creationDate = comment.creationDate; @@ -270,7 +271,7 @@ export default class PullRequest { }); } if (this.options.diff) { - output.diff = anonymizeContent(this._model.pullRequest.diff, this); + output.diff = anonymizer.anonymize(this._model.pullRequest.diff); } if (this.options.origin) { output.baseRepositoryFullName = diff --git a/src/Repository.ts b/src/Repository.ts index 87b8fa9..3c39084 100644 --- a/src/Repository.ts +++ b/src/Repository.ts @@ -1,5 +1,5 @@ import storage from "./storage"; -import { RepositoryStatus, Source, Tree, TreeElement, TreeFile } from "./types"; +import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types"; import { Readable } from "stream"; import User from "./User"; import GitHubStream from "./source/GitHubStream"; @@ -16,9 +16,9 @@ import AnonymousError from "./AnonymousError"; import { downloadQueue } from "./queue"; import { isConnected } from "./database/database"; import AnonymizedRepositoryModel from "./database/anonymizedRepositories/anonymizedRepositories.model"; -import { getRepositoryFromGitHub } from "./source/GitHubRepository"; -import config from "../config"; +import { GitHubRepository } from "./source/GitHubRepository"; import { trace } from "@opentelemetry/api"; +import { getToken } from "./GitHubUtils"; function anonymizeTreeRecursive( tree: TreeElement, @@ -48,38 +48,55 @@ function anonymizeTreeRecursive( export default class Repository { private _model: IAnonymizedRepositoryDocument; - source: Source; owner: User; constructor(data: IAnonymizedRepositoryDocument) { this._model = data; - switch (data.source.type) { + this.owner = new User(new UserModel({ _id: data.owner })); + this.owner = new User(new UserModel({ _id: data.owner })); + this.owner.model.isNew = false; + } + + private checkedToken: boolean = false; + + private async getToken() { + if (this.checkedToken) return this._model.source.accessToken as string; + const originalToken = this._model.source.accessToken; + const token = await getToken(this); + if (originalToken != token) { + this._model.source.accessToken = token; + await this._model.save(); + } + this.checkedToken = true; + return token; + } + + get source() { + switch (this.model.source.type) { case "GitHubDownload": - this.source = new GitHubDownload(data.source, this.repoId); - break; + return new GitHubDownload({ + repoId: this.repoId, + commit: this.model.source.commit || "HEAD", + organization: "", + repoName: this.model.source.repositoryName || "", + getToken: () => this.getToken(), + }); case "GitHubStream": - this.source = new GitHubStream(data.source); - break; + return new GitHubStream({ + repoId: this.repoId, + commit: this.model.source.commit || "HEAD", + organization: "", + repoName: this.model.source.repositoryName || "", + getToken: () => this.getToken(), + }); case "Zip": - this.source = new Zip(data.source, this.repoId); - break; + return new Zip(this.model.source, this.repoId); default: throw new AnonymousError("unsupported_source", { - object: data.source.type, + object: this, httpStatus: 400, }); } - this.owner = new User(new UserModel({ _id: data.owner })); - if (this.source instanceof GitHubBase) { - const originalToken = this._model.source.accessToken; - this.source.getToken(this.owner.id).then((token) => { - if (originalToken != token) { - this._model.source.accessToken = token; - this._model.save(); - } - }); - } - this.owner.model.isNew = false; } /** @@ -194,8 +211,8 @@ export default class Repository { image: this.options.image, link: this.options.link, repoId: this.repoId, - repoName: (this.source as GitHubBase).githubRepository?.fullName, - branchName: (this.source as GitHubBase).branch?.name || "main", + repoName: this.model.source.repositoryName, + branchName: this.model.source.branch || "main", }); } @@ -217,16 +234,17 @@ export default class Repository { ) { // Only GitHubBase can be update for the moment if (this.source instanceof GitHubBase) { - const token = await this.source.getToken(this.owner.id); - const branches = await this.source.githubRepository.branches({ + const token = await this.getToken(); + const ghRepo = new GitHubRepository({}); + const branches = await ghRepo.branches({ force: true, accessToken: token, }); - const branch = this.source.branch; - const newCommit = branches.filter((f) => f.name == branch.name)[0] + const branchName = this.model.source.branch || "main"; + const newCommit = branches.filter((f) => f.name == branchName)[0] ?.commit; if ( - branch.commit == newCommit && + this.model.source.commit == newCommit && this.status == RepositoryStatus.READY ) { console.log(`[UPDATE] ${this._model.repoId} is up to date`); @@ -235,12 +253,9 @@ export default class Repository { return; } this._model.source.commit = newCommit; - const commitInfo = await this.source.githubRepository.getCommitInfo( - newCommit, - { - accessToken: token, - } - ); + const commitInfo = await ghRepo.getCommitInfo(newCommit, { + accessToken: token, + }); if ( commitInfo.commit?.author?.date || commitInfo.commit?.committer?.date @@ -249,11 +264,11 @@ export default class Repository { commitInfo.commit.committer?.date) as string; this._model.source.commitDate = new Date(d); } - branch.commit = newCommit; + this.model.source.commit = newCommit; if (!newCommit) { console.error( - `${branch.name} for ${this.source.githubRepository.fullName} is not found` + `${branchName} for ${this.model.source.repositoryName} is not found` ); await this.updateStatus(RepositoryStatus.ERROR, "branch_not_found"); await this.resetSate(); @@ -268,23 +283,6 @@ export default class Repository { `[UPDATE] ${this._model.repoId} will be updated to ${newCommit}` ); - if (this.source.type == "GitHubDownload") { - const repository = await getRepositoryFromGitHub({ - accessToken: await this.source.getToken(this.owner.id), - owner: this.source.githubRepository.owner, - repo: this.source.githubRepository.repo, - }); - if ( - repository.size === undefined || - repository.size > config.MAX_REPO_SIZE - ) { - console.log( - `[UPDATE] ${this._model.repoId} will be streamed instead of downloaded` - ); - this._model.source.type = "GitHubStream"; - } - } - await this.resetSate(RepositoryStatus.PREPARING); await downloadQueue.add(this.repoId, this, { jobId: this.repoId, @@ -513,10 +511,15 @@ export default class Repository { anonymizeDate: this._model.anonymizeDate, status: this.status, statusMessage: this._model.statusMessage, - source: this.source.toJSON(), lastView: this._model.lastView, pageView: this._model.pageView, size: this.size, + source: { + fullName: this.model.source.repositoryName, + commit: this.model.source.commit, + branch: this.model.source.branch, + type: this.model.source.type, + }, }; } } diff --git a/src/User.ts b/src/User.ts index f0337cc..c93d276 100644 --- a/src/User.ts +++ b/src/User.ts @@ -6,7 +6,7 @@ import { GitHubRepository } from "./source/GitHubRepository"; import PullRequest from "./PullRequest"; import AnonymizedPullRequestModel from "./database/anonymizedPullRequests/anonymizedPullRequests.model"; import { trace } from "@opentelemetry/api"; -import GitHubBase from "./source/GitHubBase"; +import { octokit } from "./GitHubUtils"; /** * Model for a user @@ -66,9 +66,9 @@ export default class User { opt?.force === true ) { // get the list of repo from github - const octokit = GitHubBase.octokit(this.accessToken); + const oct = octokit(this.accessToken); const repositories = ( - await octokit.paginate("GET /user/repos", { + await oct.paginate("GET /user/repos", { visibility: "all", sort: "pushed", per_page: 100, diff --git a/src/anonymize-utils.ts b/src/anonymize-utils.ts index 0345330..b0a4905 100644 --- a/src/anonymize-utils.ts +++ b/src/anonymize-utils.ts @@ -1,10 +1,8 @@ import config from "../config"; -import GitHubBase from "./source/GitHubBase"; import { isText } from "istextorbinary"; import { basename } from "path"; import { Transform } from "stream"; import { Readable } from "stream"; -import AnonymizedFile from "./AnonymizedFile"; import { trace } from "@opentelemetry/api"; const urlRegex = @@ -33,16 +31,21 @@ export function isTextFile(filePath: string, content?: Buffer) { } export class AnonymizeTransformer extends Transform { - public wasAnonimized = false; public isText: boolean | null = null; + anonimizer: ContentAnonimizer; constructor( private readonly opt: { filePath: string; - } & ConstructorParameters[1] + } & ConstructorParameters[0] ) { super(); this.isText = isTextFile(this.opt.filePath); + this.anonimizer = new ContentAnonimizer(this.opt); + } + + get wasAnonimized() { + return this.anonimizer.wasAnonymized; } _transform(chunk: Buffer, encoding: string, callback: () => void) { @@ -55,11 +58,9 @@ export class AnonymizeTransformer extends Transform { } if (this.isText) { - const anonimizer = new ContentAnonimizer(chunk.toString(), this.opt); - anonimizer.anonymize(); - if (anonimizer.wasAnonymized) { - this.wasAnonimized = true; - chunk = Buffer.from(anonimizer.content); + const content = this.anonimizer.anonymize(chunk.toString()); + if (this.anonimizer.wasAnonymized) { + chunk = Buffer.from(content); } } @@ -76,25 +77,10 @@ export class AnonymizeTransformer extends Transform { } } -interface Anonymizationptions { - repoId?: string; - source?: {}; - options: { - terms: string[]; - image: boolean; - link: boolean; - pageSource?: { - branch: string; - path: string; - }; - }; -} - export class ContentAnonimizer { public wasAnonymized = false; constructor( - public content: string, readonly opt: { image?: boolean; link?: boolean; @@ -105,12 +91,12 @@ export class ContentAnonimizer { } ) {} - private removeImage() { + private removeImage(content: string): string { if (this.opt.image !== false) { - return; + return content; } // remove image in markdown - this.content = this.content.replace( + return content.replace( /!\[[^\]]*\]\((?.*?)(?=\"|\))(?\".*\")?\)/g, () => { this.wasAnonymized = true; @@ -118,20 +104,20 @@ export class ContentAnonimizer { } ); } - private removeLink() { + private removeLink(content: string): string { if (this.opt.link !== false) { - return; + return content; } // remove image in markdown - this.content = this.content.replace(urlRegex, () => { + return content.replace(urlRegex, () => { this.wasAnonymized = true; return config.ANONYMIZATION_MASK; }); } - private replaceGitHubSelfLinks() { + private replaceGitHubSelfLinks(content: string): string { if (!this.opt.repoName || !this.opt.branchName) { - return; + return content; } const repoName = this.opt.repoName; const branchName = this.opt.branchName; @@ -140,28 +126,28 @@ export class ContentAnonimizer { this.wasAnonymized = true; return `https://${config.APP_HOSTNAME}/r/${this.opt.repoId}`; }; - this.content = this.content.replace( + content = content.replace( new RegExp( `https://raw.githubusercontent.com/${repoName}/${branchName}\\b`, "gi" ), replaceCallback ); - this.content = this.content.replace( + content = content.replace( new RegExp(`https://github.com/${repoName}/blob/${branchName}\\b`, "gi"), replaceCallback ); - this.content = this.content.replace( + content = content.replace( new RegExp(`https://github.com/${repoName}/tree/${branchName}\\b`, "gi"), replaceCallback ); - this.content = this.content.replace( + return content.replace( new RegExp(`https://github.com/${repoName}`, "gi"), replaceCallback ); } - private replaceTerms() { + private replaceTerms(content: string): string { const terms = this.opt.terms || []; for (let i = 0; i < terms.length; i++) { let term = terms[i]; @@ -176,7 +162,7 @@ export class ContentAnonimizer { term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&"); } // remove whole url if it contains the term - this.content = this.content.replace(urlRegex, (match) => { + content = content.replace(urlRegex, (match) => { if (new RegExp(`\\b${term}\\b`, "gi").test(match)) { this.wasAnonymized = true; return mask; @@ -185,56 +171,34 @@ export class ContentAnonimizer { }); // remove the term in the text - this.content = this.content.replace( - new RegExp(`\\b${term}\\b`, "gi"), - () => { - this.wasAnonymized = true; - return mask; - } - ); + return content.replace(new RegExp(`\\b${term}\\b`, "gi"), () => { + this.wasAnonymized = true; + return mask; + }); } + return content; } - anonymize() { + anonymize(content: string) { const span = trace .getTracer("ano-file") .startSpan("ContentAnonimizer.anonymize"); try { - this.removeImage(); + content = this.removeImage(content); span.addEvent("removeImage"); - this.removeLink(); + content = this.removeLink(content); span.addEvent("removeLink"); - this.replaceGitHubSelfLinks(); + content = this.replaceGitHubSelfLinks(content); span.addEvent("replaceGitHubSelfLinks"); - this.replaceTerms(); + content = this.replaceTerms(content); span.addEvent("replaceTerms"); - return this.content; + return content; } finally { span.end(); } } } -export function anonymizeContent( - content: string, - repository: Anonymizationptions -) { - let repoName: string | undefined; - let branchName: string | undefined; - if (repository.source instanceof GitHubBase) { - repoName = repository.source.githubRepository.fullName; - branchName = repository.source.branch.name; - } - return new ContentAnonimizer(content, { - repoId: repository.repoId, - image: repository.options.image, - link: repository.options.link, - terms: repository.options.terms, - repoName, - branchName, - }).anonymize(); -} - export function anonymizePath(path: string, terms: string[]) { return trace .getTracer("ano-file") diff --git a/src/processes/removeRepository.ts b/src/processes/removeRepository.ts index b3b89a2..7f91697 100644 --- a/src/processes/removeRepository.ts +++ b/src/processes/removeRepository.ts @@ -3,7 +3,6 @@ import Repository from "../Repository"; import { getRepository as getRepositoryImport } from "../database/database"; import { RepositoryStatus } from "../types"; import { trace } from "@opentelemetry/api"; -import { Span } from "@opentelemetry/sdk-trace-node"; export default async function (job: SandboxedJob) { const { diff --git a/src/routes/admin.ts b/src/routes/admin.ts index ab0b188..545fb83 100644 --- a/src/routes/admin.ts +++ b/src/routes/admin.ts @@ -9,7 +9,6 @@ import Repository from "../Repository"; import User from "../User"; import { ensureAuthenticated } from "./connection"; import { handleError, getUser, isOwnerOrAdmin, getRepo } from "./route-utils"; -import RepositoryModel from "../database/repositories/repositories.model"; const router = express.Router(); diff --git a/src/routes/repository-private.ts b/src/routes/repository-private.ts index eef6e44..0de6364 100644 --- a/src/routes/repository-private.ts +++ b/src/routes/repository-private.ts @@ -5,9 +5,7 @@ import * as db from "../database/database"; import { getRepo, getUser, handleError, isOwnerOrAdmin } from "./route-utils"; import { getRepositoryFromGitHub } from "../source/GitHubRepository"; import gh = require("parse-github-url"); -import GitHubBase from "../source/GitHubBase"; import AnonymizedRepositoryModel from "../database/anonymizedRepositories/anonymizedRepositories.model"; -import config from "../../config"; import { IAnonymizedRepositoryDocument } from "../database/anonymizedRepositories/anonymizedRepositories.types"; import Repository from "../Repository"; import UserModel from "../database/users/users.model"; @@ -18,6 +16,7 @@ import RepositoryModel from "../database/repositories/repositories.model"; import User from "../User"; import { RepositoryStatus } from "../types"; import { IUserDocument } from "../database/users/users.types"; +import { checkToken } from "../GitHubUtils"; const router = express.Router(); @@ -41,7 +40,7 @@ async function getTokenForAdmin(user: User, req: express.Request) { }); const user: IUserDocument = existingRepo?.owner as any; if (user instanceof UserModel) { - const check = await GitHubBase.checkToken(user.accessTokens.github); + const check = await checkToken(user.accessTokens.github); if (check) { return user.accessTokens.github; } @@ -100,7 +99,7 @@ router.post("/claim", async (req: express.Request, res: express.Response) => { } const dbRepo = await RepositoryModel.findById( - (repoConfig.source as GitHubBase).githubRepository.id + repoConfig.model.source.repositoryId ); if (!dbRepo || dbRepo.externalId != repo.id) { diff --git a/src/routes/repository-public.ts b/src/routes/repository-public.ts index cbe2e56..2f56c45 100644 --- a/src/routes/repository-public.ts +++ b/src/routes/repository-public.ts @@ -86,9 +86,9 @@ router.get( if ( repo.status == "expired" && repo.options.expirationMode == "redirect" && - repo.source.url + repo.model.source.repositoryName ) { - redirectURL = repo.source.url; + redirectURL = `https://github.com/${repo.model.source.repositoryName}`; } else { if ( repo.status == "expired" || diff --git a/src/routes/route-utils.ts b/src/routes/route-utils.ts index 0abbcf2..4f09122 100644 --- a/src/routes/route-utils.ts +++ b/src/routes/route-utils.ts @@ -51,9 +51,9 @@ export async function getRepo( if ( repo.status == "expired" && repo.options.expirationMode == "redirect" && - repo.source.url + repo.model.source.repositoryId ) { - res.redirect(repo.source.url); + res.redirect(`https://github.com/${repo.model.source.repositoryName}`); return null; } diff --git a/src/routes/webview.ts b/src/routes/webview.ts index c326a2f..fc55996 100644 --- a/src/routes/webview.ts +++ b/src/routes/webview.ts @@ -2,7 +2,6 @@ import * as express from "express"; import { getRepo, handleError } from "./route-utils"; import * as path from "path"; import AnonymizedFile from "../AnonymizedFile"; -import GitHubDownload from "../source/GitHubDownload"; import AnonymousError from "../AnonymousError"; import { Tree, TreeElement } from "../types"; import * as marked from "marked"; @@ -36,10 +35,7 @@ async function webView(req: express.Request, res: express.Response) { }); } - if ( - repo.options.pageSource?.branch != - (repo.source as GitHubDownload).branch.name - ) { + if (repo.options.pageSource?.branch != repo.model.source.branch) { throw new AnonymousError("page_not_supported_on_different_branch", { httpStatus: 400, object: repo, diff --git a/src/source/GitHubBase.ts b/src/source/GitHubBase.ts index abddb7f..6b65ed1 100644 --- a/src/source/GitHubBase.ts +++ b/src/source/GitHubBase.ts @@ -1,39 +1,36 @@ -import { Octokit } from "@octokit/rest"; -import { trace } from "@opentelemetry/api"; import { Readable } from "stream"; import AnonymizedFile from "../AnonymizedFile"; -import { Branch, Tree } from "../types"; -import { GitHubRepository } from "./GitHubRepository"; -import config from "../../config"; -import UserModel from "../database/users/users.model"; +import { Tree } from "../types"; -export default abstract class GitHubBase { +export interface GitHubBaseData { + getToken: () => string | Promise; + repoId: string; + organization: string; + repoName: string; + commit: string; +} + +export interface SourceBase { + readonly type: string; + + /** + * Retrieve the fie content + * @param file the file of the content to retrieve + */ + getFileContent(file: AnonymizedFile): Promise; + + /** + * Get all the files from a specific source + */ + getFiles(progress?: (status: string) => void): Promise; +} + +export default abstract class GitHubBase implements SourceBase { abstract type: "GitHubDownload" | "GitHubStream" | "Zip"; - githubRepository: GitHubRepository; - branch: Branch; accessToken: string | undefined; - validToken: boolean = false; - constructor(data: { - accessToken?: string; - commit?: string; - branch?: string; - repositoryId?: string; - repositoryName?: string; - }) { - this.accessToken = data.accessToken; - const branches = []; - if (data.branch && data.commit) { - branches.push({ commit: data.commit, name: data.branch }); - } - this.githubRepository = new GitHubRepository({ - name: data.repositoryName, - externalId: data.repositoryId, - branches, - }); - this.branch = branches[0]; - } + constructor(readonly data: GitHubBaseData) {} abstract getFileContent( file: AnonymizedFile, @@ -41,69 +38,4 @@ export default abstract class GitHubBase { ): Promise; abstract getFiles(progress?: (status: string) => void): Promise; - - static octokit(token: string) { - return new Octokit({ - auth: token, - request: { - fetch: fetch, - }, - }); - } - - static async checkToken(token: string) { - const octokit = GitHubBase.octokit(token); - try { - await octokit.users.getAuthenticated(); - return true; - } catch (error) { - return false; - } - } - - async getToken(ownerID?: any) { - const span = trace.getTracer("ano-file").startSpan("GHBase.getToken"); - span.setAttribute("repoId", this.githubRepository.fullName || ""); - try { - if (this.validToken) { - return this.accessToken as string; - } - if (ownerID) { - const user = await UserModel.findById(ownerID, { - accessTokens: 1, - }); - if (user?.accessTokens.github) { - const check = await GitHubBase.checkToken(user.accessTokens.github); - if (check) { - this.accessToken = user.accessTokens.github; - this.validToken = true; - return this.accessToken; - } - } - } - if (this.accessToken) { - if (await GitHubBase.checkToken(this.accessToken)) { - this.validToken = true; - return this.accessToken; - } - } - this.accessToken = config.GITHUB_TOKEN; - return this.accessToken; - } finally { - span.end(); - } - } - - get url() { - return "https://github.com/" + this.githubRepository.fullName; - } - - toJSON(): any { - return { - type: this.type, - fullName: this.githubRepository.fullName?.toString(), - branch: this.branch?.name, - commit: this.branch?.commit, - }; - } } diff --git a/src/source/GitHubDownload.ts b/src/source/GitHubDownload.ts index c58a773..b73e1aa 100644 --- a/src/source/GitHubDownload.ts +++ b/src/source/GitHubDownload.ts @@ -3,56 +3,45 @@ import { Readable } from "stream"; import { OctokitResponse } from "@octokit/types"; import storage from "../storage"; -import GitHubBase from "./GitHubBase"; +import GitHubBase, { GitHubBaseData } from "./GitHubBase"; import AnonymizedFile from "../AnonymizedFile"; -import { SourceBase } from "../types"; import AnonymousError from "../AnonymousError"; import { trace } from "@opentelemetry/api"; import { FILE_TYPE } from "../storage/Storage"; +import { octokit } from "../GitHubUtils"; -export default class GitHubDownload extends GitHubBase implements SourceBase { +export default class GitHubDownload extends GitHubBase { type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubDownload"; - constructor( - data: { - branch?: string; - commit?: string; - repositoryId?: string; - repositoryName?: string; - accessToken?: string; - }, - readonly repoId: string - ) { + constructor(data: GitHubBaseData) { super(data); } - private async _getZipUrl( - auth: string - ): Promise> { - const octokit = GitHubBase.octokit(auth as string); - return octokit.rest.repos.downloadZipballArchive({ - owner: this.githubRepository.owner, - repo: this.githubRepository.repo, - ref: this.branch?.commit || "HEAD", + private async _getZipUrl(): Promise> { + const oct = octokit(await this.data.getToken()); + return oct.rest.repos.downloadZipballArchive({ + owner: this.data.organization, + repo: this.data.repoName, + ref: this.data.commit || "HEAD", method: "HEAD", }); } - async download(token: string, progress?: (status: string) => void) { + async download(progress?: (status: string) => void) { const span = trace.getTracer("ano-file").startSpan("GHDownload.download"); - span.setAttribute("repoId", this.githubRepository.fullName || ""); + span.setAttribute("repoId", this.data.repoId); try { let response: OctokitResponse; try { - response = await this._getZipUrl(token); + response = await this._getZipUrl(); } catch (error) { span.recordException(error as Error); throw new AnonymousError("repo_not_accessible", { httpStatus: 404, - object: this.githubRepository, + object: this.data, cause: error as Error, }); } - await storage.mk(this.repoId); + await storage.mk(this.data.repoId); let downloadProgress: { transferred: number } | undefined = undefined; let progressTimeout; let inDownload = true; @@ -73,18 +62,17 @@ export default class GitHubDownload extends GitHubBase implements SourceBase { downloadProgress = p; }); await storage.extractZip( - this.repoId, + this.data.repoId, "", downloadStream, - undefined, - this + this.type ); } catch (error) { span.recordException(error as Error); throw new AnonymousError("unable_to_download", { httpStatus: 500, cause: error as Error, - object: this.githubRepository, + object: this.data, }); } finally { inDownload = false; @@ -102,11 +90,11 @@ export default class GitHubDownload extends GitHubBase implements SourceBase { const span = trace .getTracer("ano-file") .startSpan("GHDownload.getFileContent"); - span.setAttribute("repoId", this.githubRepository.fullName || ""); + span.setAttribute("repoId", file.repository.repoId); try { const exists = await storage.exists(file.filePath); if (exists === FILE_TYPE.FILE) { - return storage.read(this.repoId, file.filePath); + return storage.read(this.data.repoId, file.filePath); } else if (exists === FILE_TYPE.FOLDER) { throw new AnonymousError("folder_not_supported", { httpStatus: 400, @@ -117,20 +105,17 @@ export default class GitHubDownload extends GitHubBase implements SourceBase { await file.originalPath(); // the cache is not ready, we need to download the repository - await this.download( - await this.getToken(file.repository.owner.id), - progress - ); - return storage.read(this.repoId, file.filePath); + await this.download(progress); + return storage.read(this.data.repoId, file.filePath); } finally { span.end(); } } - async getFiles() { - if ((await storage.exists(this.repoId)) === FILE_TYPE.NOT_FOUND) { - await this.download(await this.getToken()); + async getFiles(progress?: (status: string) => void) { + if ((await storage.exists(this.data.repoId)) === FILE_TYPE.NOT_FOUND) { + await this.download(progress); } - return storage.listFiles(this.repoId); + return storage.listFiles(this.data.repoId); } } diff --git a/src/source/GitHubRepository.ts b/src/source/GitHubRepository.ts index 33ac84f..6b9ee08 100644 --- a/src/source/GitHubRepository.ts +++ b/src/source/GitHubRepository.ts @@ -6,7 +6,7 @@ import RepositoryModel from "../database/repositories/repositories.model"; import AnonymousError from "../AnonymousError"; import { isConnected } from "../database/database"; import { trace } from "@opentelemetry/api"; -import GitHubBase from "./GitHubBase"; +import { octokit } from "../GitHubUtils"; export class GitHubRepository { private _data: Partial<{ @@ -49,7 +49,7 @@ export class GitHubRepository { async getCommitInfo( sha: string, opt: { - accessToken?: string; + accessToken: string; } ) { const span = trace @@ -58,8 +58,8 @@ export class GitHubRepository { span.setAttribute("owner", this.owner); span.setAttribute("repo", this.repo); try { - const octokit = GitHubBase.octokit(opt.accessToken as string); - const commit = await octokit.repos.getCommit({ + const oct = octokit(opt.accessToken); + const commit = await oct.repos.getCommit({ owner: this.owner, repo: this.repo, ref: sha, @@ -71,7 +71,7 @@ export class GitHubRepository { } async branches(opt: { - accessToken?: string; + accessToken: string; force?: boolean; }): Promise { const span = trace.getTracer("ano-file").startSpan("GHRepository.branches"); @@ -84,10 +84,10 @@ export class GitHubRepository { opt?.force === true ) { // get the list of repo from github - const octokit = GitHubBase.octokit(opt.accessToken as string); + const oct = octokit(opt.accessToken); try { const branches = ( - await octokit.paginate("GET /repos/{owner}/{repo}/branches", { + await oct.paginate("GET /repos/{owner}/{repo}/branches", { owner: this.owner, repo: this.repo, per_page: 100, @@ -132,7 +132,7 @@ export class GitHubRepository { async readme(opt: { branch?: string; force?: boolean; - accessToken?: string; + accessToken: string; }): Promise { const span = trace.getTracer("ano-file").startSpan("GHRepository.readme"); span.setAttribute("owner", this.owner); @@ -154,9 +154,9 @@ export class GitHubRepository { const selected = model.branches.filter((f) => f.name == opt.branch)[0]; if (selected && (!selected.readme || opt?.force === true)) { // get the list of repo from github - const octokit = GitHubBase.octokit(opt.accessToken as string); + const oct = octokit(opt.accessToken); try { - const ghRes = await octokit.repos.getReadme({ + const ghRes = await oct.repos.getReadme({ owner: this.owner, repo: this.repo, ref: selected?.commit, @@ -239,11 +239,11 @@ export async function getRepositoryFromGitHub(opt: { if (opt.repo.indexOf(".git") > -1) { opt.repo = opt.repo.replace(".git", ""); } - const octokit = GitHubBase.octokit(opt.accessToken as string); + const oct = octokit(opt.accessToken); let r: RestEndpointMethodTypes["repos"]["get"]["response"]["data"]; try { r = ( - await octokit.repos.get({ + await oct.repos.get({ owner: opt.owner, repo: opt.repo, }) @@ -282,7 +282,7 @@ export async function getRepositoryFromGitHub(opt: { model.defaultBranch = r.default_branch; model.hasPage = r.has_pages; if (model.hasPage) { - const ghPageRes = await octokit.repos.getPages({ + const ghPageRes = await oct.repos.getPages({ owner: opt.owner, repo: opt.repo, }); diff --git a/src/source/GitHubStream.ts b/src/source/GitHubStream.ts index b44c420..361af29 100644 --- a/src/source/GitHubStream.ts +++ b/src/source/GitHubStream.ts @@ -1,7 +1,7 @@ import AnonymizedFile from "../AnonymizedFile"; -import GitHubBase from "./GitHubBase"; +import GitHubBase, { GitHubBaseData } from "./GitHubBase"; import storage from "../storage"; -import { SourceBase, Tree } from "../types"; +import { Tree } from "../types"; import * as path from "path"; import got from "got"; @@ -10,28 +10,23 @@ import AnonymousError from "../AnonymousError"; import config from "../../config"; import { trace } from "@opentelemetry/api"; import { FILE_TYPE } from "../storage/Storage"; +import { octokit } from "../GitHubUtils"; -export default class GitHubStream extends GitHubBase implements SourceBase { +export default class GitHubStream extends GitHubBase { type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream"; - constructor(data: { - branch?: string; - commit?: string; - repositoryId?: string; - repositoryName?: string; - accessToken?: string; - }) { + constructor(data: GitHubBaseData) { super(data); } - downloadFile(sha: string, token: string) { + downloadFile(token: string, sha: string) { const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile"); span.setAttribute("sha", sha); - const octokit = GitHubBase.octokit(token); + const oct = octokit(token); try { - const { url } = octokit.rest.git.getBlob.endpoint({ - owner: this.githubRepository.owner, - repo: this.githubRepository.repo, + const { url } = oct.rest.git.getBlob.endpoint({ + owner: this.data.organization, + repo: this.data.repoName, file_sha: sha, }); return got.stream(url, { @@ -46,7 +41,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase { // span.recordException(error as Error); throw new AnonymousError("repo_not_accessible", { httpStatus: 404, - object: this.githubRepository, + object: this.data, cause: error as Error, }); } finally { @@ -88,8 +83,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase { }); } try { - const token = await this.getToken(file.repository.owner.id); - const content = this.downloadFile(file_sha, token); + const content = this.downloadFile(await this.data.getToken(), file_sha); // duplicate the stream to write it to the storage const stream1 = content.pipe(new stream.PassThrough()); @@ -98,8 +92,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase { file.repository.repoId, file.filePath, stream1, - file, - this + this.type ); return stream2; } catch (error) { @@ -126,17 +119,15 @@ export default class GitHubStream extends GitHubBase implements SourceBase { async getFiles() { const span = trace.getTracer("ano-file").startSpan("GHStream.getFiles"); - span.setAttribute("repoName", this.githubRepository.fullName || ""); + span.setAttribute("repoId", this.data.repoId); try { - let commit = this.branch?.commit; - return this.getTree(await this.getToken(), commit); + return this.getTree(this.data.commit); } finally { span.end(); } } private async getTree( - token: string, sha: string, truncatedTree: Tree = {}, parentPath: string = "", @@ -146,13 +137,12 @@ export default class GitHubStream extends GitHubBase implements SourceBase { } ) { const span = trace.getTracer("ano-file").startSpan("GHStream.getTree"); - span.setAttribute("repoName", this.githubRepository.fullName || ""); span.setAttribute("sha", sha); let ghRes: Awaited>; try { count.request++; - ghRes = await this.getGHTree(token, sha, { recursive: true }); + ghRes = await this.getGHTree(sha, { recursive: true }); } catch (error) { console.error(error); span.recordException(error as Error); @@ -165,8 +155,6 @@ export default class GitHubStream extends GitHubBase implements SourceBase { httpStatus: (error as any).status, cause: error as Error, object: { - owner: this.githubRepository.owner, - repo: this.githubRepository.repo, tree_sha: sha, }, }); @@ -178,24 +166,20 @@ export default class GitHubStream extends GitHubBase implements SourceBase { const tree = this.tree2Tree(ghRes.tree, truncatedTree, parentPath); count.file += ghRes.tree.length; if (ghRes.truncated) { - await this.getTruncatedTree(token, sha, tree, parentPath, count); + await this.getTruncatedTree(sha, tree, parentPath, count); } span.end(); return tree; } - private async getGHTree( - token: string, - sha: string, - opt = { recursive: true } - ) { + private async getGHTree(sha: string, opt = { recursive: true }) { const span = trace.getTracer("ano-file").startSpan("GHStream.getGHTree"); span.setAttribute("sha", sha); try { - const octokit = GitHubBase.octokit(token); - const ghRes = await octokit.git.getTree({ - owner: this.githubRepository.owner, - repo: this.githubRepository.repo, + const oct = octokit(await this.data.getToken()); + const ghRes = await oct.git.getTree({ + owner: this.data.organization, + repo: this.data.repoName, tree_sha: sha, recursive: opt.recursive ? "1" : undefined, }); @@ -206,7 +190,6 @@ export default class GitHubStream extends GitHubBase implements SourceBase { } private async getTruncatedTree( - token: string, sha: string, truncatedTree: Tree = {}, parentPath: string = "", @@ -226,7 +209,9 @@ export default class GitHubStream extends GitHubBase implements SourceBase { let data = null; try { - data = await this.getGHTree(token, sha, { recursive: false }); + data = await this.getGHTree(sha, { + recursive: false, + }); this.tree2Tree(data.tree, truncatedTree, parentPath); } catch (error) { span.recordException(error as Error); @@ -241,7 +226,6 @@ export default class GitHubStream extends GitHubBase implements SourceBase { const elementPath = path.join(parentPath, file.path); promises.push( this.getTruncatedTree( - token, file.sha, truncatedTree, elementPath, @@ -254,7 +238,9 @@ export default class GitHubStream extends GitHubBase implements SourceBase { await Promise.all(promises); } else { try { - const data = await this.getGHTree(token, sha, { recursive: true }); + const data = await this.getGHTree(sha, { + recursive: true, + }); this.tree2Tree(data.tree, truncatedTree, parentPath); if (data.truncated) { // TODO: TRUNCATED diff --git a/src/source/Zip.ts b/src/source/Zip.ts index 31b50a2..bead36f 100644 --- a/src/source/Zip.ts +++ b/src/source/Zip.ts @@ -1,7 +1,7 @@ import AnonymizedFile from "../AnonymizedFile"; import storage from "../storage"; -import { SourceBase } from "../types"; import * as stream from "stream"; +import { SourceBase } from "./GitHubBase"; export default class Zip implements SourceBase { type = "Zip"; diff --git a/src/storage/FileSystem.ts b/src/storage/FileSystem.ts index a88cf0f..ccd8a3b 100644 --- a/src/storage/FileSystem.ts +++ b/src/storage/FileSystem.ts @@ -1,6 +1,5 @@ -import { SourceBase, Tree } from "../types"; +import { Tree } from "../types"; import config from "../../config"; -import { Stream } from "node:stream"; import * as fs from "fs"; import { Extract } from "unzip-stream"; import { join, basename, dirname } from "path"; @@ -8,7 +7,6 @@ import { Response } from "express"; import { Readable, pipeline, Transform } from "stream"; import * as archiver from "archiver"; import { promisify } from "util"; -import AnonymizedFile from "../AnonymizedFile"; import { lookup } from "mime-types"; import { trace } from "@opentelemetry/api"; import StorageBase, { FILE_TYPE } from "./Storage"; @@ -78,9 +76,7 @@ export default class FileSystem extends StorageBase { async write( repoId: string, p: string, - data: string | Readable, - file?: AnonymizedFile, - source?: SourceBase + data: string | Readable ): Promise { const span = trace.getTracer("ano-file").startSpan("fs.write"); const fullPath = join(config.FOLDER, this.repoPath(repoId), p); @@ -170,13 +166,7 @@ export default class FileSystem extends StorageBase { } /** @override */ - async extractZip( - repoId: string, - p: string, - data: Readable, - file?: AnonymizedFile, - source?: SourceBase - ): Promise { + async extractZip(repoId: string, p: string, data: Readable): Promise { const pipe = promisify(pipeline); const fullPath = join(config.FOLDER, this.repoPath(repoId), p); return pipe( diff --git a/src/storage/S3.ts b/src/storage/S3.ts index a1891d4..4d6d743 100644 --- a/src/storage/S3.ts +++ b/src/storage/S3.ts @@ -14,9 +14,8 @@ import { contentType } from "mime-types"; import * as archiver from "archiver"; import { trace } from "@opentelemetry/api"; import { dirname, basename, join } from "path"; -import { SourceBase, Tree, TreeFile } from "../types"; +import { Tree, TreeFile } from "../types"; import AnonymousError from "../AnonymousError"; -import AnonymizedFile from "../AnonymizedFile"; import StorageBase, { FILE_TYPE } from "./Storage"; export default class S3Storage extends StorageBase { @@ -205,8 +204,7 @@ export default class S3Storage extends StorageBase { repoId: string, path: string, data: string | Readable, - file?: AnonymizedFile, - source?: SourceBase + source?: string ): Promise { const span = trace.getTracer("ano-file").startSpan("s3.rm"); span.setAttribute("repoId", repoId); @@ -221,7 +219,7 @@ export default class S3Storage extends StorageBase { ContentType: contentType(path).toString(), }; if (source) { - params.Tagging = `source=${source.type}`; + params.Tagging = `source=${source}`; } const parallelUploads3 = new Upload({ @@ -289,8 +287,7 @@ export default class S3Storage extends StorageBase { repoId: string, path: string, data: Readable, - file?: AnonymizedFile, - source?: SourceBase + source?: string ): Promise { let toS3: ArchiveStreamToS3; const span = trace.getTracer("ano-file").startSpan("s3.extractZip"); @@ -305,9 +302,9 @@ export default class S3Storage extends StorageBase { onEntry: (header) => { header.name = header.name.substring(header.name.indexOf("/") + 1); if (source) { - header.Tagging = `source=${source.type}`; + header.Tagging = `source=${source}`; header.Metadata = { - source: source.type, + source: source, }; } }, diff --git a/src/storage/Storage.ts b/src/storage/Storage.ts index 315613a..e80b761 100644 --- a/src/storage/Storage.ts +++ b/src/storage/Storage.ts @@ -3,8 +3,7 @@ import { Transform, Readable } from "stream"; import * as archiver from "archiver"; import { Response } from "express"; -import AnonymizedFile from "../AnonymizedFile"; -import { SourceBase, Tree } from "../types"; +import { Tree } from "../types"; export enum FILE_TYPE { FILE = "file", @@ -52,8 +51,7 @@ export default abstract class StorageBase { repoId: string, path: string, data: string | Readable, - file?: AnonymizedFile, - source?: SourceBase + source?: string ): Promise; /** @@ -73,8 +71,7 @@ export default abstract class StorageBase { repoId: string, dir: string, tar: Readable, - file?: AnonymizedFile, - source?: SourceBase + source?: string ): Promise; /** diff --git a/src/types.ts b/src/types.ts index bcebd8c..402aa88 100644 --- a/src/types.ts +++ b/src/types.ts @@ -3,30 +3,6 @@ import GitHubStream from "./source/GitHubStream"; import Zip from "./source/Zip"; import S3Storage from "./storage/S3"; import FileSystem from "./storage/FileSystem"; -import AnonymizedFile from "./AnonymizedFile"; -import { Readable } from "stream"; - -export interface SourceBase { - readonly type: string; - - /** - * The url of the source - */ - url?: string; - - /** - * Retrieve the fie content - * @param file the file of the content to retrieve - */ - getFileContent(file: AnonymizedFile): Promise; - - /** - * Get all the files from a specific source - */ - getFiles(): Promise; - - toJSON(): any; -} export type Source = GitHubDownload | GitHubStream | Zip;