mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-02-13 10:52:53 +00:00
refactor AnonymizedFile
This commit is contained in:
@@ -2,71 +2,127 @@ import * as path from "path";
|
||||
import * as express from "express";
|
||||
import * as stream from "stream";
|
||||
import Repository from "./Repository";
|
||||
import { Tree, TreeFile } from "./types";
|
||||
import { Tree, TreeElement, TreeFile } from "./types";
|
||||
import storage from "./storage";
|
||||
import config from "../config";
|
||||
import { anonymizeStream } from "./anonymize-utils";
|
||||
import { anonymizePath, anonymizeStream } from "./anonymize-utils";
|
||||
|
||||
function tree2sha(
|
||||
tree: any,
|
||||
output: { [key: string]: string } = {},
|
||||
parent: string = ""
|
||||
): { [key: string]: string } {
|
||||
for (let i in tree) {
|
||||
const sha = tree[i].sha as string;
|
||||
const size = tree[i].size as number;
|
||||
if (sha != null && size != null) {
|
||||
output[sha] = path.join(parent, i);
|
||||
} else if (tree[i].child) {
|
||||
tree2sha(tree[i].child as Tree, output, path.join(parent, i));
|
||||
} else {
|
||||
tree2sha(tree[i] as Tree, output, path.join(parent, i));
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represent a file in a anonymized repository
|
||||
*/
|
||||
export default class AnonymizedFile {
|
||||
repository: Repository;
|
||||
sha?: string;
|
||||
size?: number;
|
||||
path?: string;
|
||||
anonymizedPath: string;
|
||||
private _originalPath: string;
|
||||
private fileSize?: number;
|
||||
|
||||
constructor(
|
||||
repository: Repository,
|
||||
data: {
|
||||
path?: string;
|
||||
anonymizedPath: string;
|
||||
sha?: string;
|
||||
size?: number;
|
||||
}
|
||||
) {
|
||||
this.repository = repository;
|
||||
repository: Repository;
|
||||
anonymizedPath: string;
|
||||
sha?: string;
|
||||
|
||||
constructor(data: { repository: Repository; anonymizedPath: string }) {
|
||||
this.repository = data.repository;
|
||||
if (!this.repository.options.terms) throw new Error("terms_not_specified");
|
||||
this.anonymizedPath = data.anonymizedPath;
|
||||
if (data.path) {
|
||||
this.path = data.path;
|
||||
}
|
||||
|
||||
if (!data.anonymizedPath && this.path) {
|
||||
// anonymize the path
|
||||
this.anonymizedPath = this.path;
|
||||
for (let term of this.repository.options.terms) {
|
||||
if (term.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
this.anonymizedPath = this.anonymizedPath.replace(
|
||||
new RegExp(term, "gi"),
|
||||
config.ANONYMIZATION_MASK
|
||||
);
|
||||
}
|
||||
}
|
||||
if (!this.sha) this.sha = data.sha;
|
||||
if (!this.size) this.size = data.size;
|
||||
}
|
||||
|
||||
async send(res: express.Response): Promise<void> {
|
||||
try {
|
||||
const s = await this.anonymizedContent();
|
||||
s.on("error", (err) => {
|
||||
console.log(err);
|
||||
res.status(500).send({ error: err.message });
|
||||
});
|
||||
s.pipe(res);
|
||||
} catch (error) {
|
||||
console.log("Error during anonymization", error);
|
||||
res.status(500).send({ error: error.message });
|
||||
/**
|
||||
* De-anonymize the path
|
||||
*
|
||||
* @returns the origin relative path of the file
|
||||
*/
|
||||
async originalPath(): Promise<string> {
|
||||
// console.log(new Error().stack);
|
||||
if (this._originalPath) return this._originalPath;
|
||||
if (!this.anonymizedPath) throw new Error("path_not_specified");
|
||||
|
||||
const paths = this.anonymizedPath.trim().split("/");
|
||||
|
||||
let currentAnonymized: TreeElement =
|
||||
await this.repository.anonymizedFiles();
|
||||
let currentOriginal: TreeElement = await this.repository.files();
|
||||
let currentOriginalPath = "";
|
||||
let isAmbiguous = false;
|
||||
for (let i = 0; i < paths.length; i++) {
|
||||
const fileName = paths[i];
|
||||
if (fileName == "") {
|
||||
continue;
|
||||
}
|
||||
if (!currentAnonymized[fileName]) {
|
||||
throw new Error("file_not_found");
|
||||
}
|
||||
currentAnonymized = currentAnonymized[fileName];
|
||||
|
||||
if (!isAmbiguous && !currentOriginal[fileName]) {
|
||||
// anonymize all the file in the folder and check if there is one that match the current filename
|
||||
const options = [];
|
||||
for (let originalFileName in currentOriginal) {
|
||||
if (
|
||||
anonymizePath(originalFileName, this.repository.options.terms) ==
|
||||
fileName
|
||||
) {
|
||||
options.push(originalFileName);
|
||||
}
|
||||
}
|
||||
|
||||
// if only one option we found the original filename
|
||||
if (options.length == 1) {
|
||||
currentOriginalPath = path.join(currentOriginalPath, options[0]);
|
||||
currentOriginal = currentOriginal[options[0]];
|
||||
} else {
|
||||
isAmbiguous = true;
|
||||
}
|
||||
} else if (!isAmbiguous) {
|
||||
currentOriginalPath = path.join(currentOriginalPath, fileName);
|
||||
currentOriginal = currentOriginal[fileName];
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
currentAnonymized.sha === undefined ||
|
||||
currentAnonymized.size === undefined
|
||||
) {
|
||||
throw new Error("folder_not_supported");
|
||||
}
|
||||
|
||||
const file: TreeFile = currentAnonymized as TreeFile;
|
||||
this.fileSize = file.size;
|
||||
this.sha = file.sha;
|
||||
|
||||
if (isAmbiguous) {
|
||||
// it should never happen
|
||||
const shaTree = tree2sha(currentOriginal);
|
||||
if (!currentAnonymized.sha || !shaTree[file.sha]) {
|
||||
throw new Error("file_not_found");
|
||||
}
|
||||
|
||||
this._originalPath = path.join(currentOriginalPath, shaTree[file.sha]);
|
||||
} else {
|
||||
this._originalPath = currentOriginalPath;
|
||||
}
|
||||
|
||||
return this._originalPath;
|
||||
}
|
||||
|
||||
async isFileSupported() {
|
||||
this.path = await this.getOriginalPath();
|
||||
const filename = path.basename(this.path);
|
||||
const filename = path.basename(await this.originalPath());
|
||||
const extensions = filename.split(".").reverse();
|
||||
const extension = extensions[0].toLowerCase();
|
||||
if (!this.repository.options.pdf && extension == "pdf") {
|
||||
@@ -85,16 +141,8 @@ export default class AnonymizedFile {
|
||||
return true;
|
||||
}
|
||||
|
||||
get originalCachePath() {
|
||||
if (!this.path) throw "path_not_defined";
|
||||
return path.join(
|
||||
this.repository.originalCachePath,
|
||||
this.path
|
||||
);
|
||||
}
|
||||
|
||||
async content(): Promise<stream.Readable> {
|
||||
if (this.size && this.size > config.MAX_FILE_SIZE) {
|
||||
if (this.fileSize && this.fileSize > config.MAX_FILE_SIZE) {
|
||||
throw new Error("file_too_big");
|
||||
}
|
||||
if (await storage.exists(this.originalCachePath)) {
|
||||
@@ -105,64 +153,27 @@ export default class AnonymizedFile {
|
||||
}
|
||||
|
||||
async anonymizedContent() {
|
||||
await this.getOriginalPath();
|
||||
if (!this.path) throw new Error("path_not_specified");
|
||||
if (!this.repository.options.terms) throw new Error("terms_not_specified");
|
||||
await this.originalPath();
|
||||
const rs = await this.content();
|
||||
const contentStream = rs.pipe(anonymizeStream(this.path, this.repository));
|
||||
return contentStream;
|
||||
return rs.pipe(anonymizeStream(await this.originalPath(), this.repository));
|
||||
}
|
||||
|
||||
/**
|
||||
* De-anonymize the path
|
||||
*
|
||||
* @returns the origin relative path of the file
|
||||
*/
|
||||
async getOriginalPath(): Promise<string> {
|
||||
if (!this.anonymizedPath) throw new Error("path_not_specified");
|
||||
get originalCachePath() {
|
||||
if (!this.originalPath) throw new Error("path_not_defined");
|
||||
return path.join(this.repository.originalCachePath, this._originalPath);
|
||||
}
|
||||
|
||||
const files = await this.repository.files();
|
||||
const paths = this.anonymizedPath.trim().split("/");
|
||||
|
||||
let current: any = await this.repository.anonymizedFiles();
|
||||
for (let i = 0; i < paths.length; i++) {
|
||||
const fileName = paths[i];
|
||||
if (fileName == "") {
|
||||
continue;
|
||||
}
|
||||
if (current[fileName]) {
|
||||
current = current[fileName];
|
||||
} else {
|
||||
throw new Error("file_not_found");
|
||||
}
|
||||
async send(res: express.Response): Promise<void> {
|
||||
try {
|
||||
const s = await this.anonymizedContent();
|
||||
s.on("error", (err) => {
|
||||
console.log(err);
|
||||
res.status(500).send({ error: err.message });
|
||||
});
|
||||
s.pipe(res);
|
||||
} catch (error) {
|
||||
console.log("Error during anonymization", error);
|
||||
res.status(500).send({ error: error.message });
|
||||
}
|
||||
|
||||
function tree2sha(
|
||||
tree: any,
|
||||
output: { [key: string]: string } = {},
|
||||
parent: string = ""
|
||||
): { [key: string]: string } {
|
||||
for (let i in tree) {
|
||||
const sha = tree[i].sha as string;
|
||||
const size = tree[i].size as number;
|
||||
if (sha != null && size != null) {
|
||||
output[sha] = path.join(parent, i);
|
||||
} else if (tree[i].child) {
|
||||
tree2sha(tree[i].child as Tree, output, path.join(parent, i));
|
||||
} else {
|
||||
tree2sha(tree[i] as Tree, output, path.join(parent, i));
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
const shaTree = tree2sha(files);
|
||||
if (!current.sha || !shaTree[current.sha]) {
|
||||
throw new Error("file_not_found");
|
||||
}
|
||||
this.path = shaTree[current.sha];
|
||||
this.sha = current.sha;
|
||||
if ((current as TreeFile).size) this.size = (current as TreeFile).size;
|
||||
return this.path;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import * as path from "path";
|
||||
import storage from "./storage";
|
||||
import { RepositoryStatus, Source, Tree } from "./types";
|
||||
import { RepositoryStatus, Source, Tree, TreeElement, TreeFile } from "./types";
|
||||
import * as stream from "stream";
|
||||
import User from "./User";
|
||||
import GitHubStream from "./source/GitHubStream";
|
||||
@@ -43,23 +43,23 @@ export default class Repository {
|
||||
async anonymizedFiles(opt?: { force?: boolean }): Promise<Tree> {
|
||||
const terms = this._model.options.terms || [];
|
||||
|
||||
function anonymizeTreeRecursive(tree: Tree): any {
|
||||
if (Number.isInteger(tree.size)) {
|
||||
return tree;
|
||||
function anonymizeTreeRecursive(tree: TreeElement): TreeElement {
|
||||
if (Number.isInteger(tree.size) && tree.sha !== undefined) {
|
||||
return tree as TreeFile;
|
||||
}
|
||||
const output: any = {};
|
||||
let current: any = tree;
|
||||
if (current.child) {
|
||||
current = current.child;
|
||||
}
|
||||
for (const file in current) {
|
||||
const output: Tree = {};
|
||||
for (const file in tree) {
|
||||
const anonymizedPath = anonymizePath(file, terms);
|
||||
output[anonymizedPath] = anonymizeTreeRecursive(current[file]);
|
||||
if (output[anonymizedPath]) {
|
||||
// file anonymization conflict
|
||||
|
||||
}
|
||||
output[anonymizedPath] = anonymizeTreeRecursive(tree[file]);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
return anonymizeTreeRecursive(await this.files(opt));
|
||||
return anonymizeTreeRecursive(await this.files(opt)) as Tree;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -85,6 +85,9 @@ export default class Repository {
|
||||
return files;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the status of the repository
|
||||
*/
|
||||
check() {
|
||||
if (this._model.options.expirationMode != "never") {
|
||||
if (this._model.options.expirationDate > new Date()) {
|
||||
|
||||
@@ -37,7 +37,7 @@ export function anonymizeStream(filename: string, repository: Repository) {
|
||||
if (isTextFile(filename, data)) {
|
||||
data = anonymizeContent(data.toString(), repository);
|
||||
}
|
||||
|
||||
|
||||
chunks = [];
|
||||
len = 0;
|
||||
|
||||
@@ -105,32 +105,38 @@ export function anonymizeContent(content: string, repository: Repository) {
|
||||
);
|
||||
}
|
||||
|
||||
for (let term of repository.options.terms || []) {
|
||||
const terms = repository.options.terms || [];
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
const term = terms[i];
|
||||
if (term.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
// remove whole url if it contains the term
|
||||
content = content.replace(urlRegex, (match) => {
|
||||
if (new RegExp(`\\b${term}\\b`, "gi").test(match))
|
||||
return config.ANONYMIZATION_MASK;
|
||||
return config.ANONYMIZATION_MASK + "-" + (i + 1);
|
||||
return match;
|
||||
});
|
||||
|
||||
// remove the term in the text
|
||||
content = content.replace(
|
||||
new RegExp(`\\b${term}\\b`, "gi"),
|
||||
config.ANONYMIZATION_MASK
|
||||
config.ANONYMIZATION_MASK + "-" + (i + 1)
|
||||
);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
export function anonymizePath(path: string, terms: string[]) {
|
||||
for (let term of terms) {
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
const term = terms[i];
|
||||
if (term.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
path = path.replace(new RegExp(term, "gi"), config.ANONYMIZATION_MASK);
|
||||
path = path.replace(
|
||||
new RegExp(term, "gi"),
|
||||
config.ANONYMIZATION_MASK + "-" + (i + 1)
|
||||
);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
@@ -19,7 +19,8 @@ router.get(
|
||||
try {
|
||||
await repo.countView();
|
||||
|
||||
const f = new AnonymizedFile(repo, {
|
||||
const f = new AnonymizedFile({
|
||||
repository: repo,
|
||||
anonymizedPath,
|
||||
});
|
||||
if (!(await f.isFileSupported())) {
|
||||
|
||||
@@ -34,7 +34,8 @@ async function webView(req: express.Request, res: express.Response) {
|
||||
requestPath = path.join(requestPath, "index.html");
|
||||
}
|
||||
requestPath = requestPath;
|
||||
const f = new AnonymizedFile(repo, {
|
||||
const f = new AnonymizedFile({
|
||||
repository: repo,
|
||||
anonymizedPath: requestPath,
|
||||
});
|
||||
if (!(await f.isFileSupported())) {
|
||||
|
||||
@@ -56,7 +56,7 @@ export default async function start() {
|
||||
|
||||
app.use("/github", rate, connection.router);
|
||||
|
||||
// app routes
|
||||
// api routes
|
||||
app.use("/api/user", rate, router.user);
|
||||
app.use("/api/repo", rate, router.repositoryPublic);
|
||||
app.use("/api/repo", rate, router.file);
|
||||
|
||||
Reference in New Issue
Block a user