refactor AnonymizedFile

This commit is contained in:
tdurieux
2021-08-13 00:03:28 +02:00
parent b2426f70b2
commit 47f44fe41e
6 changed files with 155 additions and 133 deletions
+149 -138
View File
@@ -2,140 +2,10 @@ import * as path from "path";
import * as express from "express"; import * as express from "express";
import * as stream from "stream"; import * as stream from "stream";
import Repository from "./Repository"; import Repository from "./Repository";
import { Tree, TreeFile } from "./types"; import { Tree, TreeElement, TreeFile } from "./types";
import storage from "./storage"; import storage from "./storage";
import config from "../config"; import config from "../config";
import { anonymizeStream } from "./anonymize-utils"; import { anonymizePath, anonymizeStream } from "./anonymize-utils";
/**
* Represent a file in a anonymized repository
*/
export default class AnonymizedFile {
repository: Repository;
sha?: string;
size?: number;
path?: string;
anonymizedPath: string;
constructor(
repository: Repository,
data: {
path?: string;
anonymizedPath: string;
sha?: string;
size?: number;
}
) {
this.repository = repository;
if (!this.repository.options.terms) throw new Error("terms_not_specified");
this.anonymizedPath = data.anonymizedPath;
if (data.path) {
this.path = data.path;
}
if (!data.anonymizedPath && this.path) {
// anonymize the path
this.anonymizedPath = this.path;
for (let term of this.repository.options.terms) {
if (term.trim() == "") {
continue;
}
this.anonymizedPath = this.anonymizedPath.replace(
new RegExp(term, "gi"),
config.ANONYMIZATION_MASK
);
}
}
if (!this.sha) this.sha = data.sha;
if (!this.size) this.size = data.size;
}
async send(res: express.Response): Promise<void> {
try {
const s = await this.anonymizedContent();
s.on("error", (err) => {
console.log(err);
res.status(500).send({ error: err.message });
});
s.pipe(res);
} catch (error) {
console.log("Error during anonymization", error);
res.status(500).send({ error: error.message });
}
}
async isFileSupported() {
this.path = await this.getOriginalPath();
const filename = path.basename(this.path);
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
if (!this.repository.options.pdf && extension == "pdf") {
return false;
}
if (
!this.repository.options.image &&
(extension == "png" ||
extension == "ico" ||
extension == "jpg" ||
extension == "jpeg" ||
extension == "gif")
) {
return false;
}
return true;
}
get originalCachePath() {
if (!this.path) throw "path_not_defined";
return path.join(
this.repository.originalCachePath,
this.path
);
}
async content(): Promise<stream.Readable> {
if (this.size && this.size > config.MAX_FILE_SIZE) {
throw new Error("file_too_big");
}
if (await storage.exists(this.originalCachePath)) {
return storage.read(this.originalCachePath);
} else {
return await this.repository.source?.getFileContent(this);
}
}
async anonymizedContent() {
await this.getOriginalPath();
if (!this.path) throw new Error("path_not_specified");
if (!this.repository.options.terms) throw new Error("terms_not_specified");
const rs = await this.content();
const contentStream = rs.pipe(anonymizeStream(this.path, this.repository));
return contentStream;
}
/**
* De-anonymize the path
*
* @returns the origin relative path of the file
*/
async getOriginalPath(): Promise<string> {
if (!this.anonymizedPath) throw new Error("path_not_specified");
const files = await this.repository.files();
const paths = this.anonymizedPath.trim().split("/");
let current: any = await this.repository.anonymizedFiles();
for (let i = 0; i < paths.length; i++) {
const fileName = paths[i];
if (fileName == "") {
continue;
}
if (current[fileName]) {
current = current[fileName];
} else {
throw new Error("file_not_found");
}
}
function tree2sha( function tree2sha(
tree: any, tree: any,
@@ -156,13 +26,154 @@ export default class AnonymizedFile {
return output; return output;
} }
const shaTree = tree2sha(files); /**
if (!current.sha || !shaTree[current.sha]) { * Represent a file in a anonymized repository
*/
export default class AnonymizedFile {
private _originalPath: string;
private fileSize?: number;
repository: Repository;
anonymizedPath: string;
sha?: string;
constructor(data: { repository: Repository; anonymizedPath: string }) {
this.repository = data.repository;
if (!this.repository.options.terms) throw new Error("terms_not_specified");
this.anonymizedPath = data.anonymizedPath;
}
/**
* De-anonymize the path
*
* @returns the origin relative path of the file
*/
async originalPath(): Promise<string> {
// console.log(new Error().stack);
if (this._originalPath) return this._originalPath;
if (!this.anonymizedPath) throw new Error("path_not_specified");
const paths = this.anonymizedPath.trim().split("/");
let currentAnonymized: TreeElement =
await this.repository.anonymizedFiles();
let currentOriginal: TreeElement = await this.repository.files();
let currentOriginalPath = "";
let isAmbiguous = false;
for (let i = 0; i < paths.length; i++) {
const fileName = paths[i];
if (fileName == "") {
continue;
}
if (!currentAnonymized[fileName]) {
throw new Error("file_not_found"); throw new Error("file_not_found");
} }
this.path = shaTree[current.sha]; currentAnonymized = currentAnonymized[fileName];
this.sha = current.sha;
if ((current as TreeFile).size) this.size = (current as TreeFile).size; if (!isAmbiguous && !currentOriginal[fileName]) {
return this.path; // anonymize all the file in the folder and check if there is one that match the current filename
const options = [];
for (let originalFileName in currentOriginal) {
if (
anonymizePath(originalFileName, this.repository.options.terms) ==
fileName
) {
options.push(originalFileName);
}
}
// if only one option we found the original filename
if (options.length == 1) {
currentOriginalPath = path.join(currentOriginalPath, options[0]);
currentOriginal = currentOriginal[options[0]];
} else {
isAmbiguous = true;
}
} else if (!isAmbiguous) {
currentOriginalPath = path.join(currentOriginalPath, fileName);
currentOriginal = currentOriginal[fileName];
}
}
if (
currentAnonymized.sha === undefined ||
currentAnonymized.size === undefined
) {
throw new Error("folder_not_supported");
}
const file: TreeFile = currentAnonymized as TreeFile;
this.fileSize = file.size;
this.sha = file.sha;
if (isAmbiguous) {
// it should never happen
const shaTree = tree2sha(currentOriginal);
if (!currentAnonymized.sha || !shaTree[file.sha]) {
throw new Error("file_not_found");
}
this._originalPath = path.join(currentOriginalPath, shaTree[file.sha]);
} else {
this._originalPath = currentOriginalPath;
}
return this._originalPath;
}
async isFileSupported() {
const filename = path.basename(await this.originalPath());
const extensions = filename.split(".").reverse();
const extension = extensions[0].toLowerCase();
if (!this.repository.options.pdf && extension == "pdf") {
return false;
}
if (
!this.repository.options.image &&
(extension == "png" ||
extension == "ico" ||
extension == "jpg" ||
extension == "jpeg" ||
extension == "gif")
) {
return false;
}
return true;
}
async content(): Promise<stream.Readable> {
if (this.fileSize && this.fileSize > config.MAX_FILE_SIZE) {
throw new Error("file_too_big");
}
if (await storage.exists(this.originalCachePath)) {
return storage.read(this.originalCachePath);
} else {
return await this.repository.source?.getFileContent(this);
}
}
async anonymizedContent() {
await this.originalPath();
const rs = await this.content();
return rs.pipe(anonymizeStream(await this.originalPath(), this.repository));
}
get originalCachePath() {
if (!this.originalPath) throw new Error("path_not_defined");
return path.join(this.repository.originalCachePath, this._originalPath);
}
async send(res: express.Response): Promise<void> {
try {
const s = await this.anonymizedContent();
s.on("error", (err) => {
console.log(err);
res.status(500).send({ error: err.message });
});
s.pipe(res);
} catch (error) {
console.log("Error during anonymization", error);
res.status(500).send({ error: error.message });
}
} }
} }
+15 -12
View File
@@ -1,6 +1,6 @@
import * as path from "path"; import * as path from "path";
import storage from "./storage"; import storage from "./storage";
import { RepositoryStatus, Source, Tree } from "./types"; import { RepositoryStatus, Source, Tree, TreeElement, TreeFile } from "./types";
import * as stream from "stream"; import * as stream from "stream";
import User from "./User"; import User from "./User";
import GitHubStream from "./source/GitHubStream"; import GitHubStream from "./source/GitHubStream";
@@ -43,23 +43,23 @@ export default class Repository {
async anonymizedFiles(opt?: { force?: boolean }): Promise<Tree> { async anonymizedFiles(opt?: { force?: boolean }): Promise<Tree> {
const terms = this._model.options.terms || []; const terms = this._model.options.terms || [];
function anonymizeTreeRecursive(tree: Tree): any { function anonymizeTreeRecursive(tree: TreeElement): TreeElement {
if (Number.isInteger(tree.size)) { if (Number.isInteger(tree.size) && tree.sha !== undefined) {
return tree; return tree as TreeFile;
} }
const output: any = {}; const output: Tree = {};
let current: any = tree; for (const file in tree) {
if (current.child) {
current = current.child;
}
for (const file in current) {
const anonymizedPath = anonymizePath(file, terms); const anonymizedPath = anonymizePath(file, terms);
output[anonymizedPath] = anonymizeTreeRecursive(current[file]); if (output[anonymizedPath]) {
// file anonymization conflict
}
output[anonymizedPath] = anonymizeTreeRecursive(tree[file]);
} }
return output; return output;
} }
return anonymizeTreeRecursive(await this.files(opt)); return anonymizeTreeRecursive(await this.files(opt)) as Tree;
} }
/** /**
@@ -85,6 +85,9 @@ export default class Repository {
return files; return files;
} }
/**
* Check the status of the repository
*/
check() { check() {
if (this._model.options.expirationMode != "never") { if (this._model.options.expirationMode != "never") {
if (this._model.options.expirationDate > new Date()) { if (this._model.options.expirationDate > new Date()) {
+11 -5
View File
@@ -105,32 +105,38 @@ export function anonymizeContent(content: string, repository: Repository) {
); );
} }
for (let term of repository.options.terms || []) { const terms = repository.options.terms || [];
for (let i = 0; i < terms.length; i++) {
const term = terms[i];
if (term.trim() == "") { if (term.trim() == "") {
continue; continue;
} }
// remove whole url if it contains the term // remove whole url if it contains the term
content = content.replace(urlRegex, (match) => { content = content.replace(urlRegex, (match) => {
if (new RegExp(`\\b${term}\\b`, "gi").test(match)) if (new RegExp(`\\b${term}\\b`, "gi").test(match))
return config.ANONYMIZATION_MASK; return config.ANONYMIZATION_MASK + "-" + (i + 1);
return match; return match;
}); });
// remove the term in the text // remove the term in the text
content = content.replace( content = content.replace(
new RegExp(`\\b${term}\\b`, "gi"), new RegExp(`\\b${term}\\b`, "gi"),
config.ANONYMIZATION_MASK config.ANONYMIZATION_MASK + "-" + (i + 1)
); );
} }
return content; return content;
} }
export function anonymizePath(path: string, terms: string[]) { export function anonymizePath(path: string, terms: string[]) {
for (let term of terms) { for (let i = 0; i < terms.length; i++) {
const term = terms[i];
if (term.trim() == "") { if (term.trim() == "") {
continue; continue;
} }
path = path.replace(new RegExp(term, "gi"), config.ANONYMIZATION_MASK); path = path.replace(
new RegExp(term, "gi"),
config.ANONYMIZATION_MASK + "-" + (i + 1)
);
} }
return path; return path;
} }
+2 -1
View File
@@ -19,7 +19,8 @@ router.get(
try { try {
await repo.countView(); await repo.countView();
const f = new AnonymizedFile(repo, { const f = new AnonymizedFile({
repository: repo,
anonymizedPath, anonymizedPath,
}); });
if (!(await f.isFileSupported())) { if (!(await f.isFileSupported())) {
+2 -1
View File
@@ -34,7 +34,8 @@ async function webView(req: express.Request, res: express.Response) {
requestPath = path.join(requestPath, "index.html"); requestPath = path.join(requestPath, "index.html");
} }
requestPath = requestPath; requestPath = requestPath;
const f = new AnonymizedFile(repo, { const f = new AnonymizedFile({
repository: repo,
anonymizedPath: requestPath, anonymizedPath: requestPath,
}); });
if (!(await f.isFileSupported())) { if (!(await f.isFileSupported())) {
+1 -1
View File
@@ -56,7 +56,7 @@ export default async function start() {
app.use("/github", rate, connection.router); app.use("/github", rate, connection.router);
// app routes // api routes
app.use("/api/user", rate, router.user); app.use("/api/user", rate, router.user);
app.use("/api/repo", rate, router.repositoryPublic); app.use("/api/repo", rate, router.repositoryPublic);
app.use("/api/repo", rate, router.file); app.use("/api/repo", rate, router.file);