From 47f44fe41e79bda91219d619be21c5ecee8195a0 Mon Sep 17 00:00:00 2001 From: tdurieux Date: Fri, 13 Aug 2021 00:03:28 +0200 Subject: [PATCH] refactor AnonymizedFile --- src/AnonymizedFile.ts | 235 +++++++++++++++++++++-------------------- src/Repository.ts | 27 ++--- src/anonymize-utils.ts | 18 ++-- src/routes/file.ts | 3 +- src/routes/webview.ts | 3 +- src/server.ts | 2 +- 6 files changed, 155 insertions(+), 133 deletions(-) diff --git a/src/AnonymizedFile.ts b/src/AnonymizedFile.ts index f48f4a9..cc9f6b0 100644 --- a/src/AnonymizedFile.ts +++ b/src/AnonymizedFile.ts @@ -2,71 +2,127 @@ import * as path from "path"; import * as express from "express"; import * as stream from "stream"; import Repository from "./Repository"; -import { Tree, TreeFile } from "./types"; +import { Tree, TreeElement, TreeFile } from "./types"; import storage from "./storage"; import config from "../config"; -import { anonymizeStream } from "./anonymize-utils"; +import { anonymizePath, anonymizeStream } from "./anonymize-utils"; + +function tree2sha( + tree: any, + output: { [key: string]: string } = {}, + parent: string = "" +): { [key: string]: string } { + for (let i in tree) { + const sha = tree[i].sha as string; + const size = tree[i].size as number; + if (sha != null && size != null) { + output[sha] = path.join(parent, i); + } else if (tree[i].child) { + tree2sha(tree[i].child as Tree, output, path.join(parent, i)); + } else { + tree2sha(tree[i] as Tree, output, path.join(parent, i)); + } + } + return output; +} /** * Represent a file in a anonymized repository */ export default class AnonymizedFile { - repository: Repository; - sha?: string; - size?: number; - path?: string; - anonymizedPath: string; + private _originalPath: string; + private fileSize?: number; - constructor( - repository: Repository, - data: { - path?: string; - anonymizedPath: string; - sha?: string; - size?: number; - } - ) { - this.repository = repository; + repository: Repository; + anonymizedPath: string; + sha?: string; + + constructor(data: { repository: Repository; anonymizedPath: string }) { + this.repository = data.repository; if (!this.repository.options.terms) throw new Error("terms_not_specified"); this.anonymizedPath = data.anonymizedPath; - if (data.path) { - this.path = data.path; - } - - if (!data.anonymizedPath && this.path) { - // anonymize the path - this.anonymizedPath = this.path; - for (let term of this.repository.options.terms) { - if (term.trim() == "") { - continue; - } - this.anonymizedPath = this.anonymizedPath.replace( - new RegExp(term, "gi"), - config.ANONYMIZATION_MASK - ); - } - } - if (!this.sha) this.sha = data.sha; - if (!this.size) this.size = data.size; } - async send(res: express.Response): Promise { - try { - const s = await this.anonymizedContent(); - s.on("error", (err) => { - console.log(err); - res.status(500).send({ error: err.message }); - }); - s.pipe(res); - } catch (error) { - console.log("Error during anonymization", error); - res.status(500).send({ error: error.message }); + /** + * De-anonymize the path + * + * @returns the origin relative path of the file + */ + async originalPath(): Promise { + // console.log(new Error().stack); + if (this._originalPath) return this._originalPath; + if (!this.anonymizedPath) throw new Error("path_not_specified"); + + const paths = this.anonymizedPath.trim().split("/"); + + let currentAnonymized: TreeElement = + await this.repository.anonymizedFiles(); + let currentOriginal: TreeElement = await this.repository.files(); + let currentOriginalPath = ""; + let isAmbiguous = false; + for (let i = 0; i < paths.length; i++) { + const fileName = paths[i]; + if (fileName == "") { + continue; + } + if (!currentAnonymized[fileName]) { + throw new Error("file_not_found"); + } + currentAnonymized = currentAnonymized[fileName]; + + if (!isAmbiguous && !currentOriginal[fileName]) { + // anonymize all the file in the folder and check if there is one that match the current filename + const options = []; + for (let originalFileName in currentOriginal) { + if ( + anonymizePath(originalFileName, this.repository.options.terms) == + fileName + ) { + options.push(originalFileName); + } + } + + // if only one option we found the original filename + if (options.length == 1) { + currentOriginalPath = path.join(currentOriginalPath, options[0]); + currentOriginal = currentOriginal[options[0]]; + } else { + isAmbiguous = true; + } + } else if (!isAmbiguous) { + currentOriginalPath = path.join(currentOriginalPath, fileName); + currentOriginal = currentOriginal[fileName]; + } } + + if ( + currentAnonymized.sha === undefined || + currentAnonymized.size === undefined + ) { + throw new Error("folder_not_supported"); + } + + const file: TreeFile = currentAnonymized as TreeFile; + this.fileSize = file.size; + this.sha = file.sha; + + if (isAmbiguous) { + // it should never happen + const shaTree = tree2sha(currentOriginal); + if (!currentAnonymized.sha || !shaTree[file.sha]) { + throw new Error("file_not_found"); + } + + this._originalPath = path.join(currentOriginalPath, shaTree[file.sha]); + } else { + this._originalPath = currentOriginalPath; + } + + return this._originalPath; } async isFileSupported() { - this.path = await this.getOriginalPath(); - const filename = path.basename(this.path); + const filename = path.basename(await this.originalPath()); const extensions = filename.split(".").reverse(); const extension = extensions[0].toLowerCase(); if (!this.repository.options.pdf && extension == "pdf") { @@ -85,16 +141,8 @@ export default class AnonymizedFile { return true; } - get originalCachePath() { - if (!this.path) throw "path_not_defined"; - return path.join( - this.repository.originalCachePath, - this.path - ); - } - async content(): Promise { - if (this.size && this.size > config.MAX_FILE_SIZE) { + if (this.fileSize && this.fileSize > config.MAX_FILE_SIZE) { throw new Error("file_too_big"); } if (await storage.exists(this.originalCachePath)) { @@ -105,64 +153,27 @@ export default class AnonymizedFile { } async anonymizedContent() { - await this.getOriginalPath(); - if (!this.path) throw new Error("path_not_specified"); - if (!this.repository.options.terms) throw new Error("terms_not_specified"); + await this.originalPath(); const rs = await this.content(); - const contentStream = rs.pipe(anonymizeStream(this.path, this.repository)); - return contentStream; + return rs.pipe(anonymizeStream(await this.originalPath(), this.repository)); } - /** - * De-anonymize the path - * - * @returns the origin relative path of the file - */ - async getOriginalPath(): Promise { - if (!this.anonymizedPath) throw new Error("path_not_specified"); + get originalCachePath() { + if (!this.originalPath) throw new Error("path_not_defined"); + return path.join(this.repository.originalCachePath, this._originalPath); + } - const files = await this.repository.files(); - const paths = this.anonymizedPath.trim().split("/"); - - let current: any = await this.repository.anonymizedFiles(); - for (let i = 0; i < paths.length; i++) { - const fileName = paths[i]; - if (fileName == "") { - continue; - } - if (current[fileName]) { - current = current[fileName]; - } else { - throw new Error("file_not_found"); - } + async send(res: express.Response): Promise { + try { + const s = await this.anonymizedContent(); + s.on("error", (err) => { + console.log(err); + res.status(500).send({ error: err.message }); + }); + s.pipe(res); + } catch (error) { + console.log("Error during anonymization", error); + res.status(500).send({ error: error.message }); } - - function tree2sha( - tree: any, - output: { [key: string]: string } = {}, - parent: string = "" - ): { [key: string]: string } { - for (let i in tree) { - const sha = tree[i].sha as string; - const size = tree[i].size as number; - if (sha != null && size != null) { - output[sha] = path.join(parent, i); - } else if (tree[i].child) { - tree2sha(tree[i].child as Tree, output, path.join(parent, i)); - } else { - tree2sha(tree[i] as Tree, output, path.join(parent, i)); - } - } - return output; - } - - const shaTree = tree2sha(files); - if (!current.sha || !shaTree[current.sha]) { - throw new Error("file_not_found"); - } - this.path = shaTree[current.sha]; - this.sha = current.sha; - if ((current as TreeFile).size) this.size = (current as TreeFile).size; - return this.path; } } diff --git a/src/Repository.ts b/src/Repository.ts index f2a49fd..514acd6 100644 --- a/src/Repository.ts +++ b/src/Repository.ts @@ -1,6 +1,6 @@ import * as path from "path"; import storage from "./storage"; -import { RepositoryStatus, Source, Tree } from "./types"; +import { RepositoryStatus, Source, Tree, TreeElement, TreeFile } from "./types"; import * as stream from "stream"; import User from "./User"; import GitHubStream from "./source/GitHubStream"; @@ -43,23 +43,23 @@ export default class Repository { async anonymizedFiles(opt?: { force?: boolean }): Promise { const terms = this._model.options.terms || []; - function anonymizeTreeRecursive(tree: Tree): any { - if (Number.isInteger(tree.size)) { - return tree; + function anonymizeTreeRecursive(tree: TreeElement): TreeElement { + if (Number.isInteger(tree.size) && tree.sha !== undefined) { + return tree as TreeFile; } - const output: any = {}; - let current: any = tree; - if (current.child) { - current = current.child; - } - for (const file in current) { + const output: Tree = {}; + for (const file in tree) { const anonymizedPath = anonymizePath(file, terms); - output[anonymizedPath] = anonymizeTreeRecursive(current[file]); + if (output[anonymizedPath]) { + // file anonymization conflict + + } + output[anonymizedPath] = anonymizeTreeRecursive(tree[file]); } return output; } - return anonymizeTreeRecursive(await this.files(opt)); + return anonymizeTreeRecursive(await this.files(opt)) as Tree; } /** @@ -85,6 +85,9 @@ export default class Repository { return files; } + /** + * Check the status of the repository + */ check() { if (this._model.options.expirationMode != "never") { if (this._model.options.expirationDate > new Date()) { diff --git a/src/anonymize-utils.ts b/src/anonymize-utils.ts index ba0834d..ec55309 100644 --- a/src/anonymize-utils.ts +++ b/src/anonymize-utils.ts @@ -37,7 +37,7 @@ export function anonymizeStream(filename: string, repository: Repository) { if (isTextFile(filename, data)) { data = anonymizeContent(data.toString(), repository); } - + chunks = []; len = 0; @@ -105,32 +105,38 @@ export function anonymizeContent(content: string, repository: Repository) { ); } - for (let term of repository.options.terms || []) { + const terms = repository.options.terms || []; + for (let i = 0; i < terms.length; i++) { + const term = terms[i]; if (term.trim() == "") { continue; } // remove whole url if it contains the term content = content.replace(urlRegex, (match) => { if (new RegExp(`\\b${term}\\b`, "gi").test(match)) - return config.ANONYMIZATION_MASK; + return config.ANONYMIZATION_MASK + "-" + (i + 1); return match; }); // remove the term in the text content = content.replace( new RegExp(`\\b${term}\\b`, "gi"), - config.ANONYMIZATION_MASK + config.ANONYMIZATION_MASK + "-" + (i + 1) ); } return content; } export function anonymizePath(path: string, terms: string[]) { - for (let term of terms) { + for (let i = 0; i < terms.length; i++) { + const term = terms[i]; if (term.trim() == "") { continue; } - path = path.replace(new RegExp(term, "gi"), config.ANONYMIZATION_MASK); + path = path.replace( + new RegExp(term, "gi"), + config.ANONYMIZATION_MASK + "-" + (i + 1) + ); } return path; } diff --git a/src/routes/file.ts b/src/routes/file.ts index aa36c8c..636cda3 100644 --- a/src/routes/file.ts +++ b/src/routes/file.ts @@ -19,7 +19,8 @@ router.get( try { await repo.countView(); - const f = new AnonymizedFile(repo, { + const f = new AnonymizedFile({ + repository: repo, anonymizedPath, }); if (!(await f.isFileSupported())) { diff --git a/src/routes/webview.ts b/src/routes/webview.ts index b508597..0f3d37a 100644 --- a/src/routes/webview.ts +++ b/src/routes/webview.ts @@ -34,7 +34,8 @@ async function webView(req: express.Request, res: express.Response) { requestPath = path.join(requestPath, "index.html"); } requestPath = requestPath; - const f = new AnonymizedFile(repo, { + const f = new AnonymizedFile({ + repository: repo, anonymizedPath: requestPath, }); if (!(await f.isFileSupported())) { diff --git a/src/server.ts b/src/server.ts index 6443557..11901b1 100644 --- a/src/server.ts +++ b/src/server.ts @@ -56,7 +56,7 @@ export default async function start() { app.use("/github", rate, connection.router); - // app routes + // api routes app.use("/api/user", rate, router.user); app.use("/api/repo", rate, router.repositoryPublic); app.use("/api/repo", rate, router.file);