feat: flatten file tree for better performance

This commit is contained in:
tdurieux
2024-04-26 10:31:57 +01:00
parent ccdc95e4a8
commit 710f7328e7
23 changed files with 516 additions and 514 deletions

View File

@@ -1,6 +1,6 @@
<div class="container-fluid h-100">
<div class="row h-100">
<div class="leftCol shadow p-1 overflow-auto" ng-show="files">
<div class="leftCol shadow p-1 overflow-auto" ng-show="files.length">
<tree class="files" file="files"></tree>
<div class="bottom column">
<div

View File

@@ -270,30 +270,37 @@ angular
});
}
const toArray = function (obj) {
const toArray = function (arr) {
const output = [];
for (let name in obj) {
if (obj[name].size != null) {
const keys = { "": { child: output } };
for (let file of arr) {
let current = keys[file.path].child;
let fPath = `${file.path}/${file.name}`;
if (fPath.startsWith("/")) {
fPath = fPath.substring(1);
}
if (file.size != null) {
// it is a file
output.push({
name,
size: obj[name].size,
sha: obj[name].sha,
current.push({
name: file.name,
size: file.size,
sha: file.sha,
});
} else {
output.push({
name,
sha: obj[name].sha,
child: obj[name],
});
const dir = {
name: file.name,
child: [],
};
keys[fPath] = dir;
current.push(dir);
}
}
return output;
};
const sortFiles = (f1, f2) => {
const f1d = isDir(f1.child);
const f2d = isDir(f2.child);
const f1d = !!f1.child;
const f2d = !!f2.child;
if (f1d && f2d) {
return f1.name - f2.name;
}
@@ -307,22 +314,24 @@ angular
};
function generate(current, parentPath) {
const afiles = toArray(current).sort(sortFiles);
if (!current) return "";
current = current.sort(sortFiles);
const afiles = current;
let output = "<ul>";
for (let f of afiles) {
let dir = isDir(f.child);
let dir = !!f.child;
let name = f.name;
let size = f.size || 0;
if (dir) {
let test = name;
current = toArray(f.child);
while (current.length == 1) {
current = f.child;
while (current && current.length == 1) {
test += "/" + current[0].name;
size = current[0].size;
current = toArray(current[0].child);
current = current[0].child;
}
name = test;
if (current.length == 0) {
if (size > 0) {
dir = false;
}
}
@@ -332,15 +341,27 @@ angular
size = "";
}
const path = `${parentPath}/${name}`;
output += `<li class="file ${
dir ? "folder" : ""
}" ng-class="{active: isActive('${path}'), open: opens['${path}']}" title="${size}">`;
const cssClasses = ["file"];
if (dir) {
cssClasses.push("folder");
}
if ($scope.opens[path]) {
cssClasses.push("open");
}
if ($scope.isActive(path)) {
cssClasses.push("active");
}
output += `<li class="${cssClasses.join(
" "
)}" ng-class="{active: isActive('${path}'), open: opens['${path}']}" title="${size}">`;
if (dir) {
output += `<a ng-click="openFolder('${path}', $event)">${name}</a>`;
} else {
output += `<a href='/r/${$scope.repoId}${path}'>${name}</a>`;
}
if ($scope.opens[path]) {
if ($scope.opens[path] && f.child && f.child.length > 1) {
output += generate(f.child, parentPath + "/" + f.name);
}
// output += generate(f.child, parentPath + "/" + f.name);
@@ -349,44 +370,36 @@ angular
return output + "</ul>";
}
function display() {
const output = generate($scope.file, "");
$element.html("");
const output = generate(toArray($scope.file).sort(sortFiles), "");
$compile(output)($scope, (clone) => {
$element.append(clone);
});
}
$scope.$watch("file", (newValue) => {
if (newValue == null) return;
if (Array.isArray(newValue)) return;
if (Object.keys(newValue).length == 0) {
return $element.html("Empty repository");
}
display();
});
$scope.$watch(
"file",
(newValue) => {
if (newValue == null) return;
if (newValue.length == 0) {
return $element.html("Empty repository");
}
display();
},
true
);
$scope.isActive = function (name) {
return $routeParams.path == name.substring(1);
};
$scope.openFolder = function (folder, event) {
$scope.openFolder = async function (folder, event) {
$scope.opens[folder] = !$scope.opens[folder];
if (event.srcElement.nextSibling == null) {
const folders = folder.substring(1).split("/");
let current = $scope.file;
for (let folder of folders) {
current = current[folder];
}
$compile(generate(current, folder))($scope, (clone) => {
angular.element(event.srcElement.parentNode).append(clone);
});
await $scope.$parent.getFiles(folder.substring(1));
display();
}
};
const isFile = function (child) {
return child == null || child.size != null;
};
const isDir = function (child) {
return !isFile(child);
};
},
],
};
@@ -1177,7 +1190,7 @@ angular
}
await $scope.getBranches();
} catch (error) {
console.log("here", error);
console.log(error);
if (error.data) {
$translate("ERRORS." + error.data.error).then((translation) => {
const toast = {
@@ -1474,6 +1487,7 @@ angular
"$sce",
"PDFViewerService",
function ($scope, $http, $location, $routeParams, $sce, PDFViewerService) {
$scope.files = [];
const extensionModes = {
yml: "yaml",
txt: "text",
@@ -1526,6 +1540,9 @@ angular
});
function selectFile() {
if ($scope.paths[0] != "") {
return;
}
const readmePriority = [
"readme.md",
"readme.txt",
@@ -1533,21 +1550,10 @@ angular
"readme.1st",
"readme",
];
// find current folder
let currentFolder = $scope.files;
for (const p of $scope.paths) {
if (currentFolder[p]) {
currentFolder = currentFolder[p];
}
}
if (currentFolder.size && Number.isInteger(currentFolder.size)) {
// a file is already selected
return;
}
const readmeCandidates = {};
for (const file in currentFolder) {
if (file.toLowerCase().indexOf("readme") > -1) {
readmeCandidates[file.toLowerCase()] = file;
for (const file of $scope.files) {
if (file.name.toLowerCase().indexOf("readme") > -1) {
readmeCandidates[file.name.toLowerCase()] = file.name;
}
}
let best_match = null;
@@ -1569,36 +1575,29 @@ angular
$location.url(uri + readmeCandidates[best_match]);
}
}
function getFiles(callback) {
$http.get(`/api/repo/${$scope.repoId}/files/`).then(
(res) => {
$scope.files = res.data;
selectFile();
if (callback) {
return callback();
}
},
(err) => {
$scope.type = "error";
$scope.content = err.data.error;
$scope.files = null;
}
);
}
$scope.getFiles = async function (path) {
try {
const res = await $http.get(
`/api/repo/${$scope.repoId}/files/?path=${path}`
);
$scope.files.push(...res.data);
return res.data;
} catch (err) {
$scope.type = "error";
$scope.content = err.data.error;
$scope.files = [];
}
};
function getSelectedFile() {
let currentFolder = $scope.files;
for (const p of $scope.paths) {
if (currentFolder[p]) {
currentFolder = currentFolder[p];
} else {
return null;
}
}
return currentFolder;
return $scope.files.filter(
(f) =>
f.name == $scope.paths[$scope.paths.length - 1] &&
f.path == $scope.paths.slice(0, $scope.paths.length - 1).join("/")
)[0];
}
async function getOptions(callback) {
function getOptions(callback) {
$http.get(`/api/repo/${$scope.repoId}/options`).then(
(res) => {
$scope.options = res.data;
@@ -1835,8 +1834,13 @@ angular
$scope.filePath = $routeParams.path || "";
$scope.paths = $scope.filePath.split("/");
getOptions((options) => {
getFiles(() => {
getOptions(async (options) => {
for (let i = 0; i < $scope.paths.length; i++) {
const path = i > 0 ? $scope.paths.slice(0, i).join("/") : "";
await $scope.getFiles(path);
}
$scope.$apply(() => {
selectFile();
updateContent();
});
});

File diff suppressed because one or more lines are too long

View File

@@ -1,28 +1,27 @@
import { join, basename } from "path";
import { join, basename, dirname } from "path";
import { Response } from "express";
import { Readable } from "stream";
import { trace } from "@opentelemetry/api";
import { lookup } from "mime-types";
import CacheableLookup from "cacheable-lookup";
import got from "got";
import Repository from "./Repository";
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
import { RepositoryStatus } from "./types";
import config from "../config";
import { anonymizePath, isTextFile } from "./anonymize-utils";
import AnonymousError from "./AnonymousError";
import { handleError } from "../server/routes/route-utils";
import got from "got";
import FileModel from "./model/files/files.model";
import { IFile } from "./model/files/files.types";
/**
* Represent a file in a anonymized repository
*/
export default class AnonymizedFile {
private _originalPath: string | undefined;
private fileSize?: number;
repository: Repository;
anonymizedPath: string;
_sha?: string;
private _file?: IFile | null;
constructor(data: { repository: Repository; anonymizedPath: string }) {
this.repository = data.repository;
@@ -35,16 +34,87 @@ export default class AnonymizedFile {
}
async sha() {
return trace.getTracer("ano-file").startActiveSpan("sha", async (span) => {
try {
span.setAttribute("anonymizedPath", this.anonymizedPath);
if (this._sha) return this._sha.replace(/"/g, "");
await this.originalPath();
return this._sha?.replace(/"/g, "");
} finally {
span.end();
return trace
.getTracer("ano-file")
.startActiveSpan("AnnoFile.sha", async (span) => {
try {
span.setAttribute("anonymizedPath", this.anonymizedPath);
if (this._file) return this._file.sha?.replace(/"/g, "");
this._file = await this.getFileInfo();
return this._file.sha?.replace(/"/g, "");
} finally {
span.end();
}
});
}
async getFileInfo(): Promise<IFile> {
const span = trace.getTracer("ano-file").startSpan("AnnoFile.getFileInfo");
span.setAttribute("repoId", this.repository.repoId);
span.setAttribute("file", this.anonymizedPath);
try {
if (this._file) return this._file;
let fileDir = dirname(this.anonymizedPath);
if (fileDir == ".") fileDir = "";
const filename = basename(this.anonymizedPath);
if (!this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
const res = await FileModel.findOne({
repoId: this.repository.repoId,
path: fileDir,
name: filename,
});
if (res) {
this._file = res;
return res;
}
throw new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
});
}
});
const pathQuery = fileDir
.split("/")
.map((p) => {
if (p.includes(config.ANONYMIZATION_MASK)) {
return "[^/]+";
}
return p;
})
.join("/");
const nameQuery = filename.replace(
new RegExp(config.ANONYMIZATION_MASK + "(-[0-9]+)?"),
"[^/]+"
);
const candidates = await FileModel.find({
repoId: this.repository.repoId,
path: new RegExp(pathQuery),
name: new RegExp(nameQuery),
}).exec();
for (const candidate of candidates) {
const candidatePath = join(candidate.path, candidate.name);
if (
anonymizePath(candidatePath, this.repository.options.terms || []) ==
this.anonymizedPath
) {
this._file = candidate;
return candidate;
}
}
throw new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
});
} catch (error) {
span.recordException(error as Error);
throw error;
} finally {
span.end();
}
}
/**
@@ -53,102 +123,24 @@ export default class AnonymizedFile {
* @returns the origin relative path of the file
*/
async originalPath(): Promise<string> {
return trace
.getTracer("ano-file")
.startActiveSpan("originalPath", async (span) => {
try {
span.setAttribute("anonymizedPath", this.anonymizedPath);
if (this._originalPath) return this._originalPath;
if (!this.anonymizedPath) {
throw new AnonymousError("path_not_specified", {
object: this,
httpStatus: 400,
});
}
let currentOriginal = (await this.repository.files({
force: false,
})) as TreeElement;
const paths = this.anonymizedPath.trim().split("/");
let currentOriginalPath = "";
for (let i = 0; i < paths.length; i++) {
const fileName = paths[i];
if (fileName == "") {
continue;
}
if (!(currentOriginal as Tree)[fileName]) {
// anonymize all the file in the folder and check if there is one that match the current filename
const options = [];
for (let originalFileName in currentOriginal) {
if (
anonymizePath(
originalFileName,
this.repository.options.terms
) == fileName
) {
options.push(originalFileName);
}
}
// if only one option we found the original filename
if (options.length == 1) {
currentOriginalPath = join(currentOriginalPath, options[0]);
currentOriginal = (currentOriginal as Tree)[options[0]];
} else if (options.length == 0) {
throw new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
});
} else {
const nextName = paths[i + 1];
if (!nextName) {
// if there is no next name we can't find the file and we return the first option
currentOriginalPath = join(currentOriginalPath, options[0]);
currentOriginal = (currentOriginal as Tree)[options[0]];
}
let found = false;
for (const option of options) {
const optionTree = (currentOriginal as Tree)[option];
if ((optionTree as Tree).child) {
const optionTreeChild = (optionTree as Tree).child;
if ((optionTreeChild as Tree)[nextName]) {
currentOriginalPath = join(currentOriginalPath, option);
currentOriginal = optionTreeChild;
found = true;
break;
}
}
}
if (!found) {
// if we didn't find the next name we return the first option
currentOriginalPath = join(currentOriginalPath, options[0]);
currentOriginal = (currentOriginal as Tree)[options[0]];
}
}
} else {
currentOriginalPath = join(currentOriginalPath, fileName);
currentOriginal = (currentOriginal as Tree)[fileName];
}
}
if (
currentOriginal.sha === undefined ||
currentOriginal.size === undefined
) {
throw new AnonymousError("folder_not_supported", { object: this });
}
const file = currentOriginal as TreeFile;
this.fileSize = file.size;
this._sha = file.sha;
this._originalPath = currentOriginalPath;
return this._originalPath;
} finally {
span.end();
}
});
const span = trace.getTracer("ano-file").startSpan("AnnoFile.originalPath");
span.setAttribute("repoId", this.repository.repoId);
span.setAttribute("file", this.anonymizedPath);
try {
span.setAttribute("anonymizedPath", this.anonymizedPath);
if (!this.anonymizedPath) {
throw new AnonymousError("path_not_specified", {
object: this,
httpStatus: 400,
});
}
if (!this._file) {
this._file = await this.getFileInfo();
}
return join(this._file.path, this._file.name);
} finally {
span.end();
}
}
extension() {
const filename = basename(this.anonymizedPath);
@@ -194,7 +186,7 @@ export default class AnonymizedFile {
await this.originalPath();
}
span.addEvent("filePath", { originalPath: this.filePath });
if (this.fileSize && this.fileSize > config.MAX_FILE_SIZE) {
if (this._file?.size && this._file?.size > config.MAX_FILE_SIZE) {
throw new AnonymousError("file_too_big", {
object: this,
httpStatus: 403,
@@ -229,16 +221,16 @@ export default class AnonymizedFile {
});
}
const cacheableLookup = new CacheableLookup();
const hostName = new URL(config.STREAMER_ENTRYPOINT).hostname;
const ipHost = await cacheableLookup.lookupAsync(hostName);
// const cacheableLookup = new CacheableLookup();
// const hostName = new URL(config.STREAMER_ENTRYPOINT).hostname;
// const ipHost = await cacheableLookup.lookupAsync(hostName);
// use the streamer service
return got.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
method: "POST",
lookup: cacheableLookup.lookup,
host: ipHost.address,
dnsCache: cacheableLookup,
// lookup: cacheableLookup.lookup,
// host: ipHost.address,
// dnsCache: cacheableLookup,
json: {
token: await this.repository.getToken(),
repoFullName: this.repository.model.source.repositoryName,
@@ -253,7 +245,7 @@ export default class AnonymizedFile {
}
get filePath() {
if (!this._originalPath) {
if (!this._file) {
if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
throw new AnonymousError("path_not_defined", {
object: this,
@@ -263,9 +255,13 @@ export default class AnonymizedFile {
return this.anonymizedPath;
}
return this._originalPath;
return join(this._file.path, this._file.name);
}
// cacheableLookup = new CacheableLookup({
// maxTtl: 60,
// });
async send(res: Response): Promise<void> {
const anonymizer = this.repository.generateAnonymizeTransformer(
this.anonymizedPath
@@ -283,15 +279,15 @@ export default class AnonymizedFile {
this.sha(),
this.repository.getToken(),
]);
const cacheableLookup = new CacheableLookup();
const hostName = new URL(config.STREAMER_ENTRYPOINT).hostname;
const ipHost = await cacheableLookup.lookupAsync(hostName);
// const hostName = new URL(config.STREAMER_ENTRYPOINT).hostname;
// const ipHost = await this.cacheableLookup.lookupAsync(hostName);
// console.timeLog("streamer"+ this.anonymizedPath, "got ip");
got
.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
method: "POST",
lookup: cacheableLookup.lookup,
host: ipHost.address,
dnsCache: cacheableLookup,
// lookup: this.cacheableLookup.lookup,
// host: ipHost.address,
// dnsCache: this.cacheableLookup,
json: {
sha,
token,
@@ -331,9 +327,9 @@ export default class AnonymizedFile {
if (!mime && data.isText) {
res.contentType("text/plain");
}
if (!data.wasAnonimized && this.fileSize) {
if (!data.wasAnonimized && this._file?.size) {
// the text files may be anonymized and therefore the size may be different
res.header("Content-Length", this.fileSize.toString());
res.header("Content-Length", this._file?.size.toString());
}
});
const content = await this.content();

View File

@@ -1,5 +1,5 @@
import storage from "./storage";
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
import { RepositoryStatus } from "./types";
import { Readable } from "stream";
import * as sha1 from "crypto-js/sha1";
import User from "./User";
@@ -16,7 +16,6 @@ import ConferenceModel from "./model/conference/conferences.model";
import AnonymousError from "./AnonymousError";
import { downloadQueue } from "../queue";
import { isConnected } from "../server/database";
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
import {
getRepositoryFromGitHub,
GitHubRepository,
@@ -25,9 +24,12 @@ import { trace } from "@opentelemetry/api";
import { getToken } from "./GitHubUtils";
import { FILE_TYPE } from "./storage/Storage";
import config from "../config";
import FileModel from "./model/files/files.model";
import { IFile } from "./model/files/files.types";
import { join } from "path";
import AnonymizedFile from "./AnonymizedFile";
function anonymizeTreeRecursive(
tree: TreeElement,
tree: IFile[],
terms: string[],
opt: {
/** Include the file sha in the response */
@@ -35,24 +37,21 @@ function anonymizeTreeRecursive(
} = {
includeSha: false,
}
): TreeElement {
if (typeof tree.size !== "object" && tree.sha !== undefined) {
if (opt?.includeSha) return tree as TreeFile;
): Partial<IFile>[] {
return tree.map((file) => {
return {
size: tree.size,
sha: sha1(tree.sha as string).toString(),
} as TreeFile;
}
const output: Tree = {};
Object.getOwnPropertyNames(tree).forEach((file) => {
const anonymizedPath = anonymizePath(file, terms);
output[anonymizedPath] = anonymizeTreeRecursive(
(tree as Tree)[file],
terms,
opt
);
name: anonymizePath(file.name, terms),
path: anonymizePath(file.path, terms),
size: file.size,
sha: opt.includeSha
? file.sha
: file.size
? sha1(file.sha || "")
.toString()
.substring(0, 8)
: undefined,
};
});
return output;
}
export default class Repository {
@@ -124,13 +123,16 @@ export default class Repository {
force?: boolean;
/** Include the file sha in the response */
includeSha: boolean;
recursive?: boolean;
path?: string;
} = {
force: false,
includeSha: false,
recursive: true,
}
): Promise<Tree> {
): Promise<Partial<IFile>[]> {
const terms = this._model.options.terms || [];
return anonymizeTreeRecursive(await this.files(opt), terms, opt) as Tree;
return anonymizeTreeRecursive(await this.files(opt), terms, opt);
}
/**
@@ -140,32 +142,81 @@ export default class Repository {
* @returns The file tree
*/
async files(
opt: { force?: boolean; progress?: (status: string) => void } = {
opt: {
recursive?: boolean;
path?: string;
force?: boolean;
progress?: (status: string) => void;
} = {
recursive: true,
force: false,
}
): Promise<Tree> {
): Promise<IFile[]> {
const span = trace.getTracer("ano-file").startSpan("Repository.files");
span.setAttribute("repoId", this.repoId);
try {
if (!this._model.originalFiles && !opt.force) {
const res = await AnonymizedRepositoryModel.findById(this._model._id, {
originalFiles: 1,
const hasFile = await FileModel.exists({ repoId: this.repoId }).exec();
if (!hasFile || opt.force) {
await FileModel.deleteMany({ repoId: this.repoId }).exec();
const files = await this.source.getFiles(opt.progress);
files.forEach((f) => (f.repoId = this.repoId));
await FileModel.insertMany(files);
this._model.size = { storage: 0, file: 0 };
await this.computeSize();
}
if (opt.path?.includes(config.ANONYMIZATION_MASK)) {
const f = new AnonymizedFile({
repository: this,
anonymizedPath: opt.path,
});
if (!res) throw new AnonymousError("repository_not_found");
this.model.originalFiles = res.originalFiles;
opt.path = await f.originalPath();
console.log(opt.path, f);
// const anoPath = opt.path.split(config.ANONYMIZATION_MASK);
// let beforePath = anoPath[0];
// if (beforePath.endsWith("/")) {
// beforePath = beforePath.substring(0, beforePath.length - 1);
// }
// let afterPath =
// anoPath[1].indexOf("/") > -1
// ? anoPath[1].substring(anoPath[1].indexOf("/") + 1)
// : "";
// const anoTerm = opt.path.substring(
// opt.path.indexOf(config.ANONYMIZATION_MASK),
// afterPath ? opt.path.indexOf(afterPath) - 1 : undefined
// );
// const candidates = await FileModel.find({
// repoId: this.repoId,
// path: new RegExp(`^${beforePath}$`),
// }).exec();
// let found = false;
// for (const candidate of candidates) {
// const p = anonymizePath(
// candidate.name,
// this._model.options.terms || []
// );
// if (p == anoTerm) {
// opt.path = join(beforePath, candidate.name, afterPath);
// found = true;
// }
// }
// if (found === false) {
// throw new AnonymousError("path_not_found");
// }
}
if (
this._model.originalFiles &&
Object.getOwnPropertyNames(this._model.originalFiles).length !== 0 &&
!opt.force
) {
return this._model.originalFiles;
let pathQuery: string | RegExp | undefined = opt.path
? new RegExp(`^${opt.path}`)
: undefined;
if (opt.recursive === false) {
pathQuery = opt.path ? new RegExp(`^${opt.path}$`) : "";
}
const files = await this.source.getFiles(opt.progress);
this._model.originalFiles = files;
this._model.size = { storage: 0, file: 0 };
await this.computeSize();
return files;
return await FileModel.find({
repoId: this.repoId,
path: pathQuery,
}).exec();
} finally {
span.end();
}
@@ -379,6 +430,7 @@ export default class Repository {
span.end();
return;
}
this.model.increment();
await this.updateStatus(RepositoryStatus.DOWNLOAD);
await this.files({
force: false,
@@ -461,12 +513,14 @@ export default class Repository {
span.setAttribute("repoId", this.repoId);
// remove attribute
this._model.size = { storage: 0, file: 0 };
this._model.originalFiles = undefined;
if (status) {
await this.updateStatus(status, statusMessage);
}
// remove cache
await this.removeCache();
await Promise.all([
FileModel.deleteMany({ repoID: this.repoId }).exec(),
this.removeCache(),
]);
console.log(`[RESET] ${this._model.repoId} has been reset`);
span.end();
}
@@ -514,24 +568,24 @@ export default class Repository {
if (this.status !== RepositoryStatus.READY)
return { storage: 0, file: 0 };
if (this._model.size.file) return this._model.size;
function recursiveCount(files: Tree): { storage: number; file: number } {
const out = { storage: 0, file: 0 };
for (const name in files) {
const file = files[name];
if (file.size && parseInt(file.size.toString()) == file.size) {
out.storage += file.size as number;
out.file++;
} else if (typeof file == "object") {
const r = recursiveCount(file as Tree);
out.storage += r.storage;
out.file += r.file;
}
}
return out;
}
const files = await this.files();
this._model.size = recursiveCount(files);
const res = await FileModel.aggregate([
{
$match: {
repoId: this.repoId,
},
},
{
$group: {
_id: "$repoId",
storage: { $sum: "$size" },
file: { $sum: 1 },
},
},
]);
this._model.size = {
storage: res[0]?.storage || 0,
file: res[0]?.file || 0,
};
if (isConnected) {
await this._model.save();
}

View File

@@ -34,7 +34,6 @@ const AnonymizedRepositorySchema = new Schema({
type: Boolean,
default: false,
},
originalFiles: Schema.Types.Mixed,
options: {
terms: [String],
expirationMode: { type: String },

View File

@@ -1,5 +1,5 @@
import { Document, Model } from "mongoose";
import { RepositoryStatus, Tree } from "../../types";
import { RepositoryStatus } from "../../types";
export interface IAnonymizedRepository {
repoId: string;
@@ -11,14 +11,13 @@ export interface IAnonymizedRepository {
type: "GitHubDownload" | "GitHubStream" | "Zip";
branch?: string;
commit?: string;
commitDate?: Date,
commitDate?: Date;
repositoryId?: string;
repositoryName?: string;
accessToken?: string;
};
owner: string;
truckedFileList: boolean;
originalFiles?: Tree;
conference: string;
options: {
terms: string[];

View File

@@ -0,0 +1,8 @@
import { model } from "mongoose";
import { join } from "path";
import { IFileDocument, IFileModel } from "./files.types";
import FileSchema from "./files.schema";
const FileModel = model<IFileDocument>("File", FileSchema) as IFileModel;
export default FileModel;

View File

@@ -0,0 +1,19 @@
import { Schema } from "mongoose";
const FileSchema = new Schema({
name: { type: String, index: true },
path: { type: String, index: true },
repoId: { type: String, index: true },
sha: {
type: String,
},
size: {
type: Number,
},
});
FileSchema.methods.toString = function () {
return `${this.path}/${this.name}`;
};
export default FileSchema;

View File

@@ -0,0 +1,14 @@
import { Document, Model } from "mongoose";
export interface IFile {
name: string;
path: string;
repoId: string;
sha?: string;
size?: number;
}
export interface IFileDocument extends IFile, Document {
toString: (this: IFileDocument) => string;
}
export interface IFileModel extends Model<IFileDocument> {}

View File

@@ -1,8 +1,8 @@
import { Readable } from "stream";
import AnonymizedFile from "../AnonymizedFile";
import { Tree } from "../types";
import { SourceBase } from "./Source";
import { IFile } from "../model/files/files.types";
export interface GitHubBaseData {
getToken: () => string | Promise<string>;
@@ -23,5 +23,5 @@ export default abstract class GitHubBase implements SourceBase {
progress?: (status: string) => void
): Promise<Readable>;
abstract getFiles(progress?: (status: string) => void): Promise<Tree>;
abstract getFiles(progress?: (status: string) => void): Promise<IFile[]>;
}

View File

@@ -1,16 +1,17 @@
import AnonymizedFile from "../AnonymizedFile";
import GitHubBase, { GitHubBaseData } from "./GitHubBase";
import storage from "../storage";
import { Tree } from "../types";
import * as path from "path";
import got from "got";
import { basename, dirname } from "path";
import * as stream from "stream";
import AnonymousError from "../AnonymousError";
import config from "../../config";
import { trace } from "@opentelemetry/api";
import { FILE_TYPE } from "../storage/Storage";
import { octokit } from "../GitHubUtils";
import FileModel from "../model/files/files.model";
import { IFile } from "../model/files/files.types";
export default class GitHubStream extends GitHubBase {
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream";
@@ -29,6 +30,7 @@ export default class GitHubStream extends GitHubBase {
repo: this.data.repoName,
file_sha: sha,
});
console.log("[GHStream] Downloading file", url);
return got.stream(url, {
headers: {
"X-GitHub-Api-Version": "2022-11-28",
@@ -132,61 +134,17 @@ export default class GitHubStream extends GitHubBase {
const span = trace.getTracer("ano-file").startSpan("GHStream.getFiles");
span.setAttribute("repoId", this.data.repoId);
try {
return this.getTree(this.data.commit, progress);
return this.getTruncatedTree(this.data.commit, progress);
} finally {
span.end();
}
}
private async getTree(
private async getGHTree(
sha: string,
progress?: (status: string) => void,
truncatedTree: Tree = {},
parentPath: string = "",
count = {
file: 0,
request: 0,
}
count = { request: 0, file: 0 },
opt = { recursive: true, callback: () => {} }
) {
const span = trace.getTracer("ano-file").startSpan("GHStream.getTree");
span.setAttribute("sha", sha);
let ghRes: Awaited<ReturnType<typeof this.getGHTree>>;
try {
count.request++;
ghRes = await this.getGHTree(sha, { recursive: true });
} catch (error) {
span.recordException(error as Error);
if ((error as any).status == 409) {
// cannot be empty otherwise it would try to download it again
span.end();
return { __: {} };
} else {
const err = new AnonymousError("repo_not_accessible", {
httpStatus: (error as any).status,
cause: error as Error,
object: {
tree_sha: sha,
},
});
span.recordException(err);
span.end();
throw err;
}
}
const tree = this.tree2Tree(ghRes.tree, truncatedTree, parentPath);
count.file += ghRes.tree.length;
if (progress) {
progress("List file: " + count.file);
}
if (ghRes.truncated) {
await this.getTruncatedTree(sha, progress, tree, parentPath, count);
}
span.end();
return tree;
}
private async getGHTree(sha: string, opt = { recursive: true }) {
const span = trace.getTracer("ano-file").startSpan("GHStream.getGHTree");
span.setAttribute("sha", sha);
try {
@@ -195,8 +153,13 @@ export default class GitHubStream extends GitHubBase {
owner: this.data.organization,
repo: this.data.repoName,
tree_sha: sha,
recursive: opt.recursive ? "1" : undefined,
recursive: opt.recursive === true ? "1" : undefined,
});
count.request++;
count.file += ghRes.data.tree.length;
if (opt.callback) {
opt.callback();
}
return ghRes.data;
} finally {
span.end();
@@ -206,68 +169,59 @@ export default class GitHubStream extends GitHubBase {
private async getTruncatedTree(
sha: string,
progress?: (status: string) => void,
truncatedTree: Tree = {},
parentPath: string = "",
count = {
file: 0,
request: 0,
},
depth = 0
parentPath: string = ""
) {
const count = {
request: 0,
file: 0,
};
const span = trace
.getTracer("ano-file")
.startSpan("GHStream.getTruncatedTree");
span.setAttribute("sha", sha);
span.setAttribute("parentPath", parentPath);
const output: IFile[] = [];
try {
count.request++;
let data = null;
try {
data = await this.getGHTree(sha, {
data = await this.getGHTree(sha, count, {
recursive: false,
callback: () => {
if (progress) {
progress("List file: " + count.file);
}
},
});
this.tree2Tree(data.tree, truncatedTree, parentPath);
output.push(...this.tree2Tree(data.tree, parentPath));
} catch (error) {
span.recordException(error as Error);
return;
throw new AnonymousError("files_not_found", {
httpStatus: 404,
object: this.data,
cause: error as Error,
});
}
count.file += data.tree.length;
if (progress) {
progress("List file: " + count.file);
}
if (data.tree.length < 100 && count.request < 200) {
const promises: Promise<any>[] = [];
for (const file of data.tree) {
if (file.type == "tree" && file.path && file.sha) {
const elementPath = path.join(parentPath, file.path);
promises.push(
this.getTruncatedTree(
file.sha,
progress,
truncatedTree,
elementPath,
count,
depth + 1
)
);
}
}
await Promise.all(promises);
} else {
try {
const data = await this.getGHTree(sha, {
recursive: true,
});
this.tree2Tree(data.tree, truncatedTree, parentPath);
if (data.truncated) {
// TODO: TRUNCATED
}
} catch (error) {
span.recordException(error as Error);
const promises: Promise<any>[] = [];
const parentPaths: string[] = [];
for (const file of data.tree) {
if (file.type == "tree" && file.path && file.sha) {
const elementPath = path.join(parentPath, file.path);
parentPaths.push(elementPath);
promises.push(
this.getGHTree(file.sha, count, {
recursive: true,
callback: () => {
if (progress) {
progress("List file: " + count.file);
}
},
})
);
}
}
(await Promise.all(promises)).forEach((data, i) => {
output.push(...this.tree2Tree(data.tree, parentPaths[i]));
});
return output;
} finally {
span.end();
}
@@ -282,49 +236,25 @@ export default class GitHubStream extends GitHubBase {
size?: number;
url?: string;
}[],
partialTree: Tree = {},
parentPath: string = ""
) {
const span = trace.getTracer("ano-file").startSpan("GHStream.tree2Tree");
span.setAttribute("parentPath", parentPath);
try {
for (let elem of tree) {
let current = partialTree;
if (!elem.path) continue;
const paths = path.join(parentPath, elem.path).split("/");
// if elem is a folder iterate on all folders if it is a file stop before the filename
const end = elem.type == "tree" ? paths.length : paths.length - 1;
for (let i = 0; i < end; i++) {
let p = paths[i];
if (p[0] == "$") {
p = "\\" + p;
}
if (!current[p]) {
current[p] = {};
}
current = current[p] as Tree;
return tree.map((elem) => {
const fullPath = path.join(parentPath, elem.path || "");
let pathFile = dirname(fullPath);
if (pathFile === ".") {
pathFile = "";
}
// if elem is a file add the file size in the file list
if (elem.type == "blob") {
if (Object.keys(current).length > config.MAX_FILE_FOLDER) {
// TODO: TRUNCATED
continue;
}
let p = paths[end];
if (p[0] == "$") {
p = "\\" + p;
}
current[p] = {
size: elem.size || 0, // size in bit
sha: elem.sha || "",
};
}
}
return partialTree;
return new FileModel({
name: basename(fullPath),
path: pathFile,
repoId: this.data.repoId,
size: elem.size,
sha: elem.sha,
});
});
} finally {
span.end();
}

View File

@@ -1,10 +1,10 @@
import { Readable } from "stream";
import AnonymizedFile from "../AnonymizedFile";
import { Tree } from "../types";
import GitHubDownload from "./GitHubDownload";
import GitHubStream from "./GitHubStream";
import Zip from "./Zip";
import { IFile } from "../model/files/files.types";
export type Source = GitHubDownload | GitHubStream | Zip;
@@ -20,5 +20,5 @@ export interface SourceBase {
/**
* Get all the files from a specific source
*/
getFiles(progress?: (status: string) => void): Promise<Tree>;
getFiles(progress?: (status: string) => void): Promise<IFile[]>;
}

View File

@@ -1,4 +1,3 @@
import { Tree } from "../types";
import config from "../../config";
import * as fs from "fs";
import { Extract } from "unzip-stream";
@@ -10,6 +9,8 @@ import { promisify } from "util";
import { lookup } from "mime-types";
import { trace } from "@opentelemetry/api";
import StorageBase, { FILE_TYPE } from "./Storage";
import FileModel from "../model/files/files.model";
import { IFile } from "../model/files/files.types";
export default class FileSystem extends StorageBase {
type = "FileSystem";
@@ -138,23 +139,25 @@ export default class FileSystem extends StorageBase {
opt: {
onEntry?: (file: { path: string; size: number }) => void;
} = {}
): Promise<Tree> {
): Promise<IFile[]> {
return trace
.getTracer("ano-file")
.startActiveSpan("fs.listFiles", async (span) => {
span.setAttribute("path", dir);
const fullPath = join(config.FOLDER, this.repoPath(repoId), dir);
let files = await fs.promises.readdir(fullPath);
const output: Tree = {};
const output2: IFile[] = [];
for (let file of files) {
let filePath = join(fullPath, file);
try {
const stats = await fs.promises.stat(filePath);
if (file[0] == "$") {
file = "\\" + file;
}
if (stats.isDirectory()) {
output[file] = await this.listFiles(repoId, join(dir, file), opt);
output2.push(
new FileModel({ name: file, path: dir, repoID: repoId })
);
output2.push(
...(await this.listFiles(repoId, join(dir, file), opt))
);
} else if (stats.isFile()) {
if (opt.onEntry) {
opt.onEntry({
@@ -162,14 +165,22 @@ export default class FileSystem extends StorageBase {
size: stats.size,
});
}
output[file] = { size: stats.size, sha: stats.ino.toString() };
output2.push(
new FileModel({
name: file,
path: dir,
repoID: repoId,
size: stats.size,
sha: stats.ino.toString(),
})
);
}
} catch (error) {
span.recordException(error as Error);
}
}
span.end();
return output;
return output2;
});
}

View File

@@ -14,9 +14,10 @@ import { lookup } from "mime-types";
import * as archiver from "archiver";
import { trace } from "@opentelemetry/api";
import { dirname, basename, join } from "path";
import { Tree, TreeFile } from "../types";
import AnonymousError from "../AnonymousError";
import StorageBase, { FILE_TYPE } from "./Storage";
import { IFile } from "../model/files/files.types";
import FileModel from "../model/files/files.model";
export default class S3Storage extends StorageBase {
type = "AWS";
@@ -245,13 +246,13 @@ export default class S3Storage extends StorageBase {
}
/** @override */
async listFiles(repoId: string, dir: string = ""): Promise<Tree> {
async listFiles(repoId: string, dir: string = ""): Promise<IFile[]> {
const span = trace.getTracer("ano-file").startSpan("s3.listFiles");
span.setAttribute("path", dir);
try {
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
const out: Tree = {};
const out: IFile[] = [];
let req: ListObjectsV2CommandOutput;
let nextContinuationToken: string | undefined;
do {
@@ -267,22 +268,15 @@ export default class S3Storage extends StorageBase {
for (const f of req.Contents) {
if (!f.Key) continue;
f.Key = f.Key.replace(join(this.repoPath(repoId), dir), "");
const paths = f.Key.split("/");
let current: Tree = out;
for (let i = 0; i < paths.length - 1; i++) {
let p = paths[i];
if (!p) continue;
if (!(current[p] as Tree)) {
current[p] = {} as Tree;
}
current = current[p] as Tree;
}
if (f.ETag) {
const fileInfo: TreeFile = { size: f.Size || 0, sha: f.ETag };
const fileName = paths[paths.length - 1];
if (fileName) current[fileName] = fileInfo;
}
out.push(
new FileModel({
name: basename(f.Key),
path: dirname(f.Key),
repoID: repoId,
size: f.Size,
sha: f.ETag,
})
);
}
} while (req && req.Contents && req.IsTruncated);
return out;

View File

@@ -3,9 +3,9 @@ import { Transform, Readable } from "stream";
import * as archiver from "archiver";
import { Response } from "express";
import { Tree } from "../types";
import S3Storage from "./S3";
import FileSystem from "./FileSystem";
import { IFile } from "../model/files/files.types";
export type Storage = S3Storage | FileSystem;
@@ -62,7 +62,7 @@ export default abstract class StorageBase {
* List the files from dir
* @param dir
*/
abstract listFiles(repoId: string, dir: string): Promise<Tree>;
abstract listFiles(repoId: string, dir: string): Promise<IFile[]>;
/**
* Extract the content of tar to dir

View File

@@ -19,14 +19,3 @@ export enum RepositoryStatus {
export type ConferenceStatus = "ready" | "expired" | "removed";
export type SourceStatus = "available" | "unavailable";
export type TreeElement = Tree | TreeFile;
export interface Tree {
[key: string]: TreeElement;
}
export interface TreeFile {
sha: string;
size: number;
}

View File

@@ -277,7 +277,6 @@ router.get("/conferences", async (req, res) => {
],
};
}
res.json({
query: query,
page,

View File

@@ -26,7 +26,6 @@ router.get(
const repo = await getRepo(req, res, {
nocheck: false,
includeFiles: false,
});
if (!repo) return;

View File

@@ -131,7 +131,6 @@ router.post(
try {
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
@@ -158,7 +157,6 @@ router.delete(
async (req: express.Request, res: express.Response) => {
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
// if (repo.status == "removing") return res.json({ status: repo.status });
@@ -271,7 +269,6 @@ router.get("/:repoId/", async (req: express.Request, res: express.Response) => {
try {
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
@@ -364,7 +361,6 @@ router.post(
try {
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
const user = await getUser(req);

View File

@@ -63,10 +63,16 @@ router.get(
"/:repoId/files",
async (req: express.Request, res: express.Response) => {
res.header("Cache-Control", "no-cache");
const repo = await getRepo(req, res, { includeFiles: true });
const repo = await getRepo(req, res);
if (!repo) return;
try {
res.json(await repo.anonymizedFiles({ includeSha: false }));
res.json(
await repo.anonymizedFiles({
includeSha: false,
recursive: false,
path: req.query.path as string,
})
);
} catch (error) {
handleError(error, res, req);
}
@@ -80,7 +86,6 @@ router.get(
res.header("Cache-Control", "no-cache");
const repo = await getRepo(req, res, {
nocheck: true,
includeFiles: false,
});
if (!repo) return;
let redirectURL = null;

View File

@@ -38,15 +38,12 @@ export async function getPullRequest(
export async function getRepo(
req: express.Request,
res: express.Response,
opt: { nocheck?: boolean; includeFiles?: boolean } = {
opt: { nocheck?: boolean } = {
nocheck: false,
includeFiles: false,
}
) {
try {
const repo = await db.getRepository(req.params.repoId, {
includeFiles: opt.includeFiles === true,
});
const repo = await db.getRepository(req.params.repoId);
if (opt.nocheck == true) {
} else {
// redirect if the repository is expired

View File

@@ -3,7 +3,6 @@ import { getRepo, handleError } from "./route-utils";
import * as path from "path";
import AnonymizedFile from "../../core/AnonymizedFile";
import AnonymousError from "../../core/AnonymousError";
import { Tree, TreeElement } from "../../core/types";
import * as marked from "marked";
import { streamToString } from "../../core/anonymize-utils";
@@ -35,55 +34,45 @@ async function webView(req: express.Request, res: express.Response) {
});
}
if (repo.options.pageSource?.branch != repo.model.source.branch) {
if (repo.options.pageSource.branch != repo.model.source.branch) {
throw new AnonymousError("page_not_supported_on_different_branch", {
httpStatus: 400,
object: repo,
});
}
let requestPath = path.join(
repo.options.pageSource?.path,
req.path.substring(
req.path.indexOf(req.params.repoId) + req.params.repoId.length
)
);
let wRoot = repo.options.pageSource.path;
if (wRoot.at(0) == "/") {
wRoot = wRoot.substring(1);
}
const filePath = req.path.split(req.params.repoId)[1];
let requestPath = path.join(wRoot, filePath);
let f = new AnonymizedFile({
repository: repo,
anonymizedPath: requestPath,
});
if (requestPath[requestPath.length - 1] == "/") {
// find index file
const paths = f.anonymizedPath.trim().split("/");
let currentAnonymized: TreeElement = await repo.anonymizedFiles({
includeSha: true,
if (
requestPath.at(-1) == "/" &&
req.headers.accept?.includes("text/html")
) {
// look for index file
const candidates = await repo.files({
recursive: false,
path: await f.originalPath(),
});
for (let i = 0; i < paths.length; i++) {
const fileName = paths[i];
if (fileName == "") {
continue;
}
if (!(currentAnonymized as Tree)[fileName]) {
throw new AnonymousError("file_not_found", {
object: repo,
httpStatus: 404,
});
}
currentAnonymized = (currentAnonymized as Tree)[fileName];
}
let best_match = null;
let bestMatch = null;
indexSelector: for (const p of indexPriority) {
for (let filename in currentAnonymized) {
if (filename.toLowerCase() == p) {
best_match = filename;
for (const file of candidates) {
if (file.name.toLowerCase() == p) {
bestMatch = file;
break indexSelector;
}
}
}
if (best_match) {
requestPath = path.join(requestPath, best_match);
if (bestMatch) {
requestPath = path.join(bestMatch.path, bestMatch.name);
f = new AnonymizedFile({
repository: repo,
anonymizedPath: requestPath,