diff --git a/src/AnonymizedFile.ts b/src/AnonymizedFile.ts index efc6e46..70d0b47 100644 --- a/src/AnonymizedFile.ts +++ b/src/AnonymizedFile.ts @@ -38,7 +38,7 @@ export default class AnonymizedFile { repository: Repository; anonymizedPath: string; - sha?: string; + _sha?: string; constructor(data: { repository: Repository; anonymizedPath: string }) { this.repository = data.repository; @@ -50,6 +50,12 @@ export default class AnonymizedFile { this.anonymizedPath = data.anonymizedPath; } + async sha() { + if (this._sha) return this._sha; + await this.originalPath(); + return this._sha; + } + /** * De-anonymize the path * @@ -118,7 +124,7 @@ export default class AnonymizedFile { const file: TreeFile = currentAnonymized as TreeFile; this.fileSize = file.size; - this.sha = file.sha; + this._sha = file.sha; if (isAmbiguous) { // it should never happen @@ -134,16 +140,15 @@ export default class AnonymizedFile { } else { this._originalPath = currentOriginalPath; } - return this._originalPath; } - async extension() { - const filename = basename(await this.originalPath()); + extension() { + const filename = basename(this.anonymizedPath); const extensions = filename.split(".").reverse(); return extensions[0].toLowerCase(); } - async isImage(): Promise { - const extension = await this.extension(); + isImage() { + const extension = this.extension(); return [ "png", "jpg", @@ -160,18 +165,21 @@ export default class AnonymizedFile { "heic", ].includes(extension); } - async isFileSupported() { - const extension = await this.extension(); + isFileSupported() { + const extension = this.extension(); if (!this.repository.options.pdf && extension == "pdf") { return false; } - if (!this.repository.options.image && (await this.isImage())) { + if (!this.repository.options.image && this.isImage()) { return false; } return true; } async content(): Promise { + if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) { + await this.originalPath(); + } if (this.fileSize && this.fileSize > config.MAX_FILE_SIZE) { throw new AnonymousError("file_too_big", { object: this, @@ -185,9 +193,8 @@ export default class AnonymizedFile { } async anonymizedContent() { - await this.originalPath(); const rs = await this.content(); - return rs.pipe(anonymizeStream(await this.originalPath(), this.repository)); + return rs.pipe(anonymizeStream(this)); } get originalCachePath() { @@ -196,14 +203,25 @@ export default class AnonymizedFile { object: this, httpStatus: 400, }); + if (!this._originalPath) { + if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) { + throw new AnonymousError("path_not_defined", { + object: this, + httpStatus: 400, + }); + } else { + return join(this.repository.originalCachePath, this.anonymizedPath); + } + } + return join(this.repository.originalCachePath, this._originalPath); } async send(res: Response): Promise { const pipe = promisify(pipeline); try { - if (await this.extension()) { - res.contentType(await this.extension()); + if (this.extension()) { + res.contentType(this.extension()); } await pipe(await this.anonymizedContent(), res); } catch (error) { diff --git a/src/Repository.ts b/src/Repository.ts index de4ec4b..cc7f85d 100644 --- a/src/Repository.ts +++ b/src/Repository.ts @@ -16,6 +16,7 @@ import ConferenceModel from "./database/conference/conferences.model"; import AnonymousError from "./AnonymousError"; import { downloadQueue } from "./queue"; import { isConnected } from "./database/database"; +import AnonymizedFile from "./AnonymizedFile"; export default class Repository { private _model: IAnonymizedRepositoryDocument; @@ -48,12 +49,17 @@ export default class Repository { * @param opt force to get an updated list of files * @returns The anonymized file tree */ - async anonymizedFiles(opt?: { - /** Force to refresh the file tree */ - force?: boolean; - /** Include the file sha in the response */ - includeSha: boolean; - }): Promise { + async anonymizedFiles( + opt: { + /** Force to refresh the file tree */ + force?: boolean; + /** Include the file sha in the response */ + includeSha: boolean; + } = { + force: false, + includeSha: false, + } + ): Promise { const terms = this._model.options.terms || []; function anonymizeTreeRecursive(tree: TreeElement): TreeElement { @@ -78,11 +84,11 @@ export default class Repository { * @param opt force to get an updated list of files * @returns The file tree */ - async files(opt?: { force?: boolean }) { + async files(opt: { force?: boolean } = { force: false }): Promise { if ( this._model.originalFiles && Object.keys(this._model.originalFiles).length !== 0 && - !opt?.force + !opt.force ) { return this._model.originalFiles; } @@ -90,9 +96,6 @@ export default class Repository { this._model.originalFiles = files; this._model.size = { storage: 0, file: 0 }; await this.computeSize(); - await this._model.save(); - - this._model.originalFiles = files; return files; } @@ -140,8 +143,13 @@ export default class Repository { zip(): Readable { return storage.archive(this.originalCachePath, { format: "zip", - fileTransformer: (filename) => - anonymizeStream(filename, this) as Transformer, + fileTransformer: (filename: string) => + anonymizeStream( + new AnonymizedFile({ + repository: this, + anonymizedPath: filename, + }) + ) as Transformer, }); } diff --git a/src/User.ts b/src/User.ts index 26cd1a6..cde7e3a 100644 --- a/src/User.ts +++ b/src/User.ts @@ -119,9 +119,14 @@ export default class User { */ async getRepositories() { const repositories = ( - await AnonymizedRepositoryModel.find({ - owner: this.id, - }).exec() + await AnonymizedRepositoryModel.find( + { + owner: this.id, + }, + { + originalFiles: 0, + } + ).exec() ).map((d) => new Repository(d)); const promises = []; for (let repo of repositories) { diff --git a/src/anonymize-utils.ts b/src/anonymize-utils.ts index 06406e0..78a44b1 100644 --- a/src/anonymize-utils.ts +++ b/src/anonymize-utils.ts @@ -5,6 +5,7 @@ import { isText } from "istextorbinary"; import { basename } from "path"; import { Transform } from "stream"; import { Readable } from "stream"; +import AnonymizedFile from "./AnonymizedFile"; const urlRegex = /?/g; @@ -31,7 +32,7 @@ export function isTextFile(filePath: string, content: Buffer) { return isText(filename, content); } -export function anonymizeStream(filename: string, repository: Repository) { +export function anonymizeStream(file: AnonymizedFile) { const ts = new Transform(); var chunks = [], len = 0, @@ -43,8 +44,8 @@ export function anonymizeStream(filename: string, repository: Repository) { if (pos === 1) { let data: any = Buffer.concat(chunks, len); - if (isTextFile(filename, data)) { - data = anonymizeContent(data.toString(), repository); + if (isTextFile(file.anonymizedPath, data)) { + data = anonymizeContent(data.toString(), file.repository); } chunks = []; @@ -60,8 +61,8 @@ export function anonymizeStream(filename: string, repository: Repository) { ts._flush = function _flush(cb) { if (chunks.length) { let data: any = Buffer.concat(chunks, len); - if (isText(filename, data)) { - data = anonymizeContent(data.toString(), repository); + if (isText(file.anonymizedPath, data)) { + data = anonymizeContent(data.toString(), file.repository); } this.push(data); diff --git a/src/database/database.ts b/src/database/database.ts index ff989ac..2edc001 100644 --- a/src/database/database.ts +++ b/src/database/database.ts @@ -21,14 +21,25 @@ export async function connect() { return database; } -export async function getRepository(repoId: string) { +export async function getRepository( + repoId: string, + opts: { + includeFiles: boolean; + } = { + includeFiles: true, + } +) { if (!repoId || repoId == "undefined") { throw new AnonymousError("repo_not_found", { object: repoId, httpStatus: 404, }); } - const data = await AnonymizedRepositoryModel.findOne({ repoId }); + const project: any = {}; + if (!opts.includeFiles) { + project.originalFiles = 0; + } + const data = await AnonymizedRepositoryModel.findOne({ repoId }, project); if (!data) throw new AnonymousError("repo_not_found", { object: repoId, diff --git a/src/routes/file.ts b/src/routes/file.ts index 739ed97..92d68fd 100644 --- a/src/routes/file.ts +++ b/src/routes/file.ts @@ -19,12 +19,12 @@ router.get( try { await repo.countView(); - + const f = new AnonymizedFile({ repository: repo, anonymizedPath, }); - if (!(await f.isFileSupported())) { + if (!f.isFileSupported()) { throw new AnonymousError("file_not_supported", { httpStatus: 403, object: f, @@ -34,7 +34,7 @@ router.get( anonymizedPath.substring(anonymizedPath.lastIndexOf("/") + 1) ); // cache the file for 5min - res.header('Cache-Control', 'max-age=300'); + res.header("Cache-Control", "max-age=300"); await f.send(res); } catch (error) { return handleError(error, res, req); diff --git a/src/routes/repository-private.ts b/src/routes/repository-private.ts index 09047ca..6403960 100644 --- a/src/routes/repository-private.ts +++ b/src/routes/repository-private.ts @@ -109,7 +109,10 @@ router.post( "/:repoId/refresh", async (req: express.Request, res: express.Response) => { try { - const repo = await getRepo(req, res, { nocheck: true }); + const repo = await getRepo(req, res, { + nocheck: true, + includeFiles: false, + }); if (!repo) return; if ( @@ -133,7 +136,10 @@ router.post( router.delete( "/:repoId/", async (req: express.Request, res: express.Response) => { - const repo = await getRepo(req, res, { nocheck: true }); + const repo = await getRepo(req, res, { + nocheck: true, + includeFiles: false, + }); if (!repo) return; // if (repo.status == "removing") return res.json({ status: repo.status }); try { @@ -237,7 +243,10 @@ router.get( // get repository information router.get("/:repoId/", async (req: express.Request, res: express.Response) => { try { - const repo = await getRepo(req, res, { nocheck: true }); + const repo = await getRepo(req, res, { + nocheck: true, + includeFiles: false, + }); if (!repo) return; const user = await getUser(req); @@ -327,7 +336,10 @@ router.post( "/:repoId/", async (req: express.Request, res: express.Response) => { try { - const repo = await getRepo(req, res, { nocheck: true }); + const repo = await getRepo(req, res, { + nocheck: true, + includeFiles: false, + }); if (!repo) return; const user = await getUser(req); diff --git a/src/routes/repository-public.ts b/src/routes/repository-public.ts index f88c0ba..253b375 100644 --- a/src/routes/repository-public.ts +++ b/src/routes/repository-public.ts @@ -76,7 +76,7 @@ router.get( async (req: express.Request, res: express.Response) => { try { res.header("Cache-Control", "no-cache"); - const repo = await getRepo(req, res, { nocheck: true }); + const repo = await getRepo(req, res, { nocheck: true, includeFiles: false }); if (!repo) return; let redirectURL = null; if ( diff --git a/src/routes/route-utils.ts b/src/routes/route-utils.ts index 9c97500..5213b62 100644 --- a/src/routes/route-utils.ts +++ b/src/routes/route-utils.ts @@ -37,11 +37,16 @@ export async function getPullRequest( export async function getRepo( req: express.Request, res: express.Response, - opt?: { nocheck?: boolean } + opt: { nocheck?: boolean; includeFiles?: boolean } = { + nocheck: false, + includeFiles: true, + } ) { try { - const repo = await db.getRepository(req.params.repoId); - if (opt?.nocheck == true) { + const repo = await db.getRepository(req.params.repoId, { + includeFiles: opt.includeFiles, + }); + if (opt.nocheck == true) { } else { // redirect if the repository is expired if ( diff --git a/src/routes/webview.ts b/src/routes/webview.ts index a164da6..dfa9434 100644 --- a/src/routes/webview.ts +++ b/src/routes/webview.ts @@ -78,8 +78,7 @@ async function webView(req: express.Request, res: express.Response) { } let best_match = null; - indexSelector: - for (const p of indexPriority) { + indexSelector: for (const p of indexPriority) { for (let filename in currentAnonymized) { if (filename.toLowerCase() == p) { best_match = filename; @@ -96,13 +95,13 @@ async function webView(req: express.Request, res: express.Response) { } } - if (!(await f.isFileSupported())) { + if (!f.isFileSupported()) { throw new AnonymousError("file_not_supported", { httpStatus: 400, object: f, }); } - if ((await f.extension()) == "md") { + if (f.extension() == "md") { const content = await streamToString(await f.anonymizedContent()); res.contentType("html").send(marked.marked(content)); } else { diff --git a/src/source/GitHubStream.ts b/src/source/GitHubStream.ts index 3447bd7..d079f08 100644 --- a/src/source/GitHubStream.ts +++ b/src/source/GitHubStream.ts @@ -39,7 +39,7 @@ export default class GitHubStream extends GitHubBase implements SourceBase { const ghRes = await octokit.rest.git.getBlob({ owner: this.githubRepository.owner, repo: this.githubRepository.repo, - file_sha: file.sha, + file_sha: await file.sha(), }); if (!ghRes.data.content && ghRes.data.size != 0) { throw new AnonymousError("file_not_accessible", { diff --git a/src/storage/S3.ts b/src/storage/S3.ts index 629ba59..cc050d5 100644 --- a/src/storage/S3.ts +++ b/src/storage/S3.ts @@ -27,6 +27,9 @@ export default class S3Storage implements StorageBase { endpoint: config.S3_ENDPOINT, accessKeyId: config.S3_CLIENT_ID, secretAccessKey: config.S3_CLIENT_SECRET, + httpOptions: { + timeout: 1000 * 60 * 60 * 2, // 2 hour + } }); }