mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-02-12 18:32:44 +00:00
feat: flatten file tree for better performance
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
<div class="container-fluid h-100">
|
||||
<div class="row h-100">
|
||||
<div class="leftCol shadow p-1 overflow-auto" ng-show="files">
|
||||
<div class="leftCol shadow p-1 overflow-auto" ng-show="files.length">
|
||||
<tree class="files" file="files"></tree>
|
||||
<div class="bottom column">
|
||||
<div
|
||||
|
||||
@@ -270,30 +270,37 @@ angular
|
||||
});
|
||||
}
|
||||
|
||||
const toArray = function (obj) {
|
||||
const toArray = function (arr) {
|
||||
const output = [];
|
||||
for (let name in obj) {
|
||||
if (obj[name].size != null) {
|
||||
const keys = { "": { child: output } };
|
||||
for (let file of arr) {
|
||||
let current = keys[file.path].child;
|
||||
let fPath = `${file.path}/${file.name}`;
|
||||
if (fPath.startsWith("/")) {
|
||||
fPath = fPath.substring(1);
|
||||
}
|
||||
if (file.size != null) {
|
||||
// it is a file
|
||||
output.push({
|
||||
name,
|
||||
size: obj[name].size,
|
||||
sha: obj[name].sha,
|
||||
current.push({
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
sha: file.sha,
|
||||
});
|
||||
} else {
|
||||
output.push({
|
||||
name,
|
||||
sha: obj[name].sha,
|
||||
child: obj[name],
|
||||
});
|
||||
const dir = {
|
||||
name: file.name,
|
||||
child: [],
|
||||
};
|
||||
keys[fPath] = dir;
|
||||
current.push(dir);
|
||||
}
|
||||
}
|
||||
return output;
|
||||
};
|
||||
|
||||
const sortFiles = (f1, f2) => {
|
||||
const f1d = isDir(f1.child);
|
||||
const f2d = isDir(f2.child);
|
||||
const f1d = !!f1.child;
|
||||
const f2d = !!f2.child;
|
||||
if (f1d && f2d) {
|
||||
return f1.name - f2.name;
|
||||
}
|
||||
@@ -307,22 +314,24 @@ angular
|
||||
};
|
||||
|
||||
function generate(current, parentPath) {
|
||||
const afiles = toArray(current).sort(sortFiles);
|
||||
if (!current) return "";
|
||||
current = current.sort(sortFiles);
|
||||
const afiles = current;
|
||||
let output = "<ul>";
|
||||
for (let f of afiles) {
|
||||
let dir = isDir(f.child);
|
||||
let dir = !!f.child;
|
||||
let name = f.name;
|
||||
let size = f.size || 0;
|
||||
if (dir) {
|
||||
let test = name;
|
||||
current = toArray(f.child);
|
||||
while (current.length == 1) {
|
||||
current = f.child;
|
||||
while (current && current.length == 1) {
|
||||
test += "/" + current[0].name;
|
||||
size = current[0].size;
|
||||
current = toArray(current[0].child);
|
||||
current = current[0].child;
|
||||
}
|
||||
name = test;
|
||||
if (current.length == 0) {
|
||||
if (size > 0) {
|
||||
dir = false;
|
||||
}
|
||||
}
|
||||
@@ -332,15 +341,27 @@ angular
|
||||
size = "";
|
||||
}
|
||||
const path = `${parentPath}/${name}`;
|
||||
output += `<li class="file ${
|
||||
dir ? "folder" : ""
|
||||
}" ng-class="{active: isActive('${path}'), open: opens['${path}']}" title="${size}">`;
|
||||
|
||||
const cssClasses = ["file"];
|
||||
if (dir) {
|
||||
cssClasses.push("folder");
|
||||
}
|
||||
if ($scope.opens[path]) {
|
||||
cssClasses.push("open");
|
||||
}
|
||||
if ($scope.isActive(path)) {
|
||||
cssClasses.push("active");
|
||||
}
|
||||
|
||||
output += `<li class="${cssClasses.join(
|
||||
" "
|
||||
)}" ng-class="{active: isActive('${path}'), open: opens['${path}']}" title="${size}">`;
|
||||
if (dir) {
|
||||
output += `<a ng-click="openFolder('${path}', $event)">${name}</a>`;
|
||||
} else {
|
||||
output += `<a href='/r/${$scope.repoId}${path}'>${name}</a>`;
|
||||
}
|
||||
if ($scope.opens[path]) {
|
||||
if ($scope.opens[path] && f.child && f.child.length > 1) {
|
||||
output += generate(f.child, parentPath + "/" + f.name);
|
||||
}
|
||||
// output += generate(f.child, parentPath + "/" + f.name);
|
||||
@@ -349,44 +370,36 @@ angular
|
||||
return output + "</ul>";
|
||||
}
|
||||
function display() {
|
||||
const output = generate($scope.file, "");
|
||||
$element.html("");
|
||||
const output = generate(toArray($scope.file).sort(sortFiles), "");
|
||||
$compile(output)($scope, (clone) => {
|
||||
$element.append(clone);
|
||||
});
|
||||
}
|
||||
|
||||
$scope.$watch("file", (newValue) => {
|
||||
if (newValue == null) return;
|
||||
if (Array.isArray(newValue)) return;
|
||||
if (Object.keys(newValue).length == 0) {
|
||||
return $element.html("Empty repository");
|
||||
}
|
||||
display();
|
||||
});
|
||||
$scope.$watch(
|
||||
"file",
|
||||
(newValue) => {
|
||||
if (newValue == null) return;
|
||||
if (newValue.length == 0) {
|
||||
return $element.html("Empty repository");
|
||||
}
|
||||
display();
|
||||
},
|
||||
true
|
||||
);
|
||||
|
||||
$scope.isActive = function (name) {
|
||||
return $routeParams.path == name.substring(1);
|
||||
};
|
||||
|
||||
$scope.openFolder = function (folder, event) {
|
||||
$scope.openFolder = async function (folder, event) {
|
||||
$scope.opens[folder] = !$scope.opens[folder];
|
||||
if (event.srcElement.nextSibling == null) {
|
||||
const folders = folder.substring(1).split("/");
|
||||
let current = $scope.file;
|
||||
for (let folder of folders) {
|
||||
current = current[folder];
|
||||
}
|
||||
$compile(generate(current, folder))($scope, (clone) => {
|
||||
angular.element(event.srcElement.parentNode).append(clone);
|
||||
});
|
||||
await $scope.$parent.getFiles(folder.substring(1));
|
||||
display();
|
||||
}
|
||||
};
|
||||
const isFile = function (child) {
|
||||
return child == null || child.size != null;
|
||||
};
|
||||
const isDir = function (child) {
|
||||
return !isFile(child);
|
||||
};
|
||||
},
|
||||
],
|
||||
};
|
||||
@@ -1177,7 +1190,7 @@ angular
|
||||
}
|
||||
await $scope.getBranches();
|
||||
} catch (error) {
|
||||
console.log("here", error);
|
||||
console.log(error);
|
||||
if (error.data) {
|
||||
$translate("ERRORS." + error.data.error).then((translation) => {
|
||||
const toast = {
|
||||
@@ -1474,6 +1487,7 @@ angular
|
||||
"$sce",
|
||||
"PDFViewerService",
|
||||
function ($scope, $http, $location, $routeParams, $sce, PDFViewerService) {
|
||||
$scope.files = [];
|
||||
const extensionModes = {
|
||||
yml: "yaml",
|
||||
txt: "text",
|
||||
@@ -1526,6 +1540,9 @@ angular
|
||||
});
|
||||
|
||||
function selectFile() {
|
||||
if ($scope.paths[0] != "") {
|
||||
return;
|
||||
}
|
||||
const readmePriority = [
|
||||
"readme.md",
|
||||
"readme.txt",
|
||||
@@ -1533,21 +1550,10 @@ angular
|
||||
"readme.1st",
|
||||
"readme",
|
||||
];
|
||||
// find current folder
|
||||
let currentFolder = $scope.files;
|
||||
for (const p of $scope.paths) {
|
||||
if (currentFolder[p]) {
|
||||
currentFolder = currentFolder[p];
|
||||
}
|
||||
}
|
||||
if (currentFolder.size && Number.isInteger(currentFolder.size)) {
|
||||
// a file is already selected
|
||||
return;
|
||||
}
|
||||
const readmeCandidates = {};
|
||||
for (const file in currentFolder) {
|
||||
if (file.toLowerCase().indexOf("readme") > -1) {
|
||||
readmeCandidates[file.toLowerCase()] = file;
|
||||
for (const file of $scope.files) {
|
||||
if (file.name.toLowerCase().indexOf("readme") > -1) {
|
||||
readmeCandidates[file.name.toLowerCase()] = file.name;
|
||||
}
|
||||
}
|
||||
let best_match = null;
|
||||
@@ -1569,36 +1575,29 @@ angular
|
||||
$location.url(uri + readmeCandidates[best_match]);
|
||||
}
|
||||
}
|
||||
function getFiles(callback) {
|
||||
$http.get(`/api/repo/${$scope.repoId}/files/`).then(
|
||||
(res) => {
|
||||
$scope.files = res.data;
|
||||
selectFile();
|
||||
if (callback) {
|
||||
return callback();
|
||||
}
|
||||
},
|
||||
(err) => {
|
||||
$scope.type = "error";
|
||||
$scope.content = err.data.error;
|
||||
$scope.files = null;
|
||||
}
|
||||
);
|
||||
}
|
||||
$scope.getFiles = async function (path) {
|
||||
try {
|
||||
const res = await $http.get(
|
||||
`/api/repo/${$scope.repoId}/files/?path=${path}`
|
||||
);
|
||||
$scope.files.push(...res.data);
|
||||
return res.data;
|
||||
} catch (err) {
|
||||
$scope.type = "error";
|
||||
$scope.content = err.data.error;
|
||||
$scope.files = [];
|
||||
}
|
||||
};
|
||||
|
||||
function getSelectedFile() {
|
||||
let currentFolder = $scope.files;
|
||||
for (const p of $scope.paths) {
|
||||
if (currentFolder[p]) {
|
||||
currentFolder = currentFolder[p];
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return currentFolder;
|
||||
return $scope.files.filter(
|
||||
(f) =>
|
||||
f.name == $scope.paths[$scope.paths.length - 1] &&
|
||||
f.path == $scope.paths.slice(0, $scope.paths.length - 1).join("/")
|
||||
)[0];
|
||||
}
|
||||
|
||||
async function getOptions(callback) {
|
||||
function getOptions(callback) {
|
||||
$http.get(`/api/repo/${$scope.repoId}/options`).then(
|
||||
(res) => {
|
||||
$scope.options = res.data;
|
||||
@@ -1835,8 +1834,13 @@ angular
|
||||
$scope.filePath = $routeParams.path || "";
|
||||
$scope.paths = $scope.filePath.split("/");
|
||||
|
||||
getOptions((options) => {
|
||||
getFiles(() => {
|
||||
getOptions(async (options) => {
|
||||
for (let i = 0; i < $scope.paths.length; i++) {
|
||||
const path = i > 0 ? $scope.paths.slice(0, i).join("/") : "";
|
||||
await $scope.getFiles(path);
|
||||
}
|
||||
$scope.$apply(() => {
|
||||
selectFile();
|
||||
updateContent();
|
||||
});
|
||||
});
|
||||
|
||||
2
public/script/bundle.min.js
vendored
2
public/script/bundle.min.js
vendored
File diff suppressed because one or more lines are too long
@@ -1,28 +1,27 @@
|
||||
import { join, basename } from "path";
|
||||
import { join, basename, dirname } from "path";
|
||||
import { Response } from "express";
|
||||
import { Readable } from "stream";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { lookup } from "mime-types";
|
||||
import CacheableLookup from "cacheable-lookup";
|
||||
import got from "got";
|
||||
|
||||
import Repository from "./Repository";
|
||||
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
|
||||
import { RepositoryStatus } from "./types";
|
||||
import config from "../config";
|
||||
import { anonymizePath, isTextFile } from "./anonymize-utils";
|
||||
import AnonymousError from "./AnonymousError";
|
||||
import { handleError } from "../server/routes/route-utils";
|
||||
import got from "got";
|
||||
import FileModel from "./model/files/files.model";
|
||||
import { IFile } from "./model/files/files.types";
|
||||
|
||||
/**
|
||||
* Represent a file in a anonymized repository
|
||||
*/
|
||||
export default class AnonymizedFile {
|
||||
private _originalPath: string | undefined;
|
||||
private fileSize?: number;
|
||||
|
||||
repository: Repository;
|
||||
anonymizedPath: string;
|
||||
_sha?: string;
|
||||
|
||||
private _file?: IFile | null;
|
||||
|
||||
constructor(data: { repository: Repository; anonymizedPath: string }) {
|
||||
this.repository = data.repository;
|
||||
@@ -35,16 +34,87 @@ export default class AnonymizedFile {
|
||||
}
|
||||
|
||||
async sha() {
|
||||
return trace.getTracer("ano-file").startActiveSpan("sha", async (span) => {
|
||||
try {
|
||||
span.setAttribute("anonymizedPath", this.anonymizedPath);
|
||||
if (this._sha) return this._sha.replace(/"/g, "");
|
||||
await this.originalPath();
|
||||
return this._sha?.replace(/"/g, "");
|
||||
} finally {
|
||||
span.end();
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("AnnoFile.sha", async (span) => {
|
||||
try {
|
||||
span.setAttribute("anonymizedPath", this.anonymizedPath);
|
||||
if (this._file) return this._file.sha?.replace(/"/g, "");
|
||||
this._file = await this.getFileInfo();
|
||||
return this._file.sha?.replace(/"/g, "");
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async getFileInfo(): Promise<IFile> {
|
||||
const span = trace.getTracer("ano-file").startSpan("AnnoFile.getFileInfo");
|
||||
span.setAttribute("repoId", this.repository.repoId);
|
||||
span.setAttribute("file", this.anonymizedPath);
|
||||
|
||||
try {
|
||||
if (this._file) return this._file;
|
||||
let fileDir = dirname(this.anonymizedPath);
|
||||
if (fileDir == ".") fileDir = "";
|
||||
const filename = basename(this.anonymizedPath);
|
||||
|
||||
if (!this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
|
||||
const res = await FileModel.findOne({
|
||||
repoId: this.repository.repoId,
|
||||
path: fileDir,
|
||||
name: filename,
|
||||
});
|
||||
if (res) {
|
||||
this._file = res;
|
||||
return res;
|
||||
}
|
||||
throw new AnonymousError("file_not_found", {
|
||||
object: this,
|
||||
httpStatus: 404,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
const pathQuery = fileDir
|
||||
.split("/")
|
||||
.map((p) => {
|
||||
if (p.includes(config.ANONYMIZATION_MASK)) {
|
||||
return "[^/]+";
|
||||
}
|
||||
return p;
|
||||
})
|
||||
.join("/");
|
||||
const nameQuery = filename.replace(
|
||||
new RegExp(config.ANONYMIZATION_MASK + "(-[0-9]+)?"),
|
||||
"[^/]+"
|
||||
);
|
||||
|
||||
const candidates = await FileModel.find({
|
||||
repoId: this.repository.repoId,
|
||||
path: new RegExp(pathQuery),
|
||||
name: new RegExp(nameQuery),
|
||||
}).exec();
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const candidatePath = join(candidate.path, candidate.name);
|
||||
if (
|
||||
anonymizePath(candidatePath, this.repository.options.terms || []) ==
|
||||
this.anonymizedPath
|
||||
) {
|
||||
this._file = candidate;
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
throw new AnonymousError("file_not_found", {
|
||||
object: this,
|
||||
httpStatus: 404,
|
||||
});
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
throw error;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -53,102 +123,24 @@ export default class AnonymizedFile {
|
||||
* @returns the origin relative path of the file
|
||||
*/
|
||||
async originalPath(): Promise<string> {
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("originalPath", async (span) => {
|
||||
try {
|
||||
span.setAttribute("anonymizedPath", this.anonymizedPath);
|
||||
if (this._originalPath) return this._originalPath;
|
||||
if (!this.anonymizedPath) {
|
||||
throw new AnonymousError("path_not_specified", {
|
||||
object: this,
|
||||
httpStatus: 400,
|
||||
});
|
||||
}
|
||||
|
||||
let currentOriginal = (await this.repository.files({
|
||||
force: false,
|
||||
})) as TreeElement;
|
||||
|
||||
const paths = this.anonymizedPath.trim().split("/");
|
||||
let currentOriginalPath = "";
|
||||
for (let i = 0; i < paths.length; i++) {
|
||||
const fileName = paths[i];
|
||||
if (fileName == "") {
|
||||
continue;
|
||||
}
|
||||
if (!(currentOriginal as Tree)[fileName]) {
|
||||
// anonymize all the file in the folder and check if there is one that match the current filename
|
||||
const options = [];
|
||||
for (let originalFileName in currentOriginal) {
|
||||
if (
|
||||
anonymizePath(
|
||||
originalFileName,
|
||||
this.repository.options.terms
|
||||
) == fileName
|
||||
) {
|
||||
options.push(originalFileName);
|
||||
}
|
||||
}
|
||||
|
||||
// if only one option we found the original filename
|
||||
if (options.length == 1) {
|
||||
currentOriginalPath = join(currentOriginalPath, options[0]);
|
||||
currentOriginal = (currentOriginal as Tree)[options[0]];
|
||||
} else if (options.length == 0) {
|
||||
throw new AnonymousError("file_not_found", {
|
||||
object: this,
|
||||
httpStatus: 404,
|
||||
});
|
||||
} else {
|
||||
const nextName = paths[i + 1];
|
||||
if (!nextName) {
|
||||
// if there is no next name we can't find the file and we return the first option
|
||||
currentOriginalPath = join(currentOriginalPath, options[0]);
|
||||
currentOriginal = (currentOriginal as Tree)[options[0]];
|
||||
}
|
||||
let found = false;
|
||||
for (const option of options) {
|
||||
const optionTree = (currentOriginal as Tree)[option];
|
||||
if ((optionTree as Tree).child) {
|
||||
const optionTreeChild = (optionTree as Tree).child;
|
||||
if ((optionTreeChild as Tree)[nextName]) {
|
||||
currentOriginalPath = join(currentOriginalPath, option);
|
||||
currentOriginal = optionTreeChild;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
// if we didn't find the next name we return the first option
|
||||
currentOriginalPath = join(currentOriginalPath, options[0]);
|
||||
currentOriginal = (currentOriginal as Tree)[options[0]];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
currentOriginalPath = join(currentOriginalPath, fileName);
|
||||
currentOriginal = (currentOriginal as Tree)[fileName];
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
currentOriginal.sha === undefined ||
|
||||
currentOriginal.size === undefined
|
||||
) {
|
||||
throw new AnonymousError("folder_not_supported", { object: this });
|
||||
}
|
||||
|
||||
const file = currentOriginal as TreeFile;
|
||||
this.fileSize = file.size;
|
||||
this._sha = file.sha;
|
||||
|
||||
this._originalPath = currentOriginalPath;
|
||||
return this._originalPath;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
});
|
||||
const span = trace.getTracer("ano-file").startSpan("AnnoFile.originalPath");
|
||||
span.setAttribute("repoId", this.repository.repoId);
|
||||
span.setAttribute("file", this.anonymizedPath);
|
||||
try {
|
||||
span.setAttribute("anonymizedPath", this.anonymizedPath);
|
||||
if (!this.anonymizedPath) {
|
||||
throw new AnonymousError("path_not_specified", {
|
||||
object: this,
|
||||
httpStatus: 400,
|
||||
});
|
||||
}
|
||||
if (!this._file) {
|
||||
this._file = await this.getFileInfo();
|
||||
}
|
||||
return join(this._file.path, this._file.name);
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
extension() {
|
||||
const filename = basename(this.anonymizedPath);
|
||||
@@ -194,7 +186,7 @@ export default class AnonymizedFile {
|
||||
await this.originalPath();
|
||||
}
|
||||
span.addEvent("filePath", { originalPath: this.filePath });
|
||||
if (this.fileSize && this.fileSize > config.MAX_FILE_SIZE) {
|
||||
if (this._file?.size && this._file?.size > config.MAX_FILE_SIZE) {
|
||||
throw new AnonymousError("file_too_big", {
|
||||
object: this,
|
||||
httpStatus: 403,
|
||||
@@ -229,16 +221,16 @@ export default class AnonymizedFile {
|
||||
});
|
||||
}
|
||||
|
||||
const cacheableLookup = new CacheableLookup();
|
||||
const hostName = new URL(config.STREAMER_ENTRYPOINT).hostname;
|
||||
const ipHost = await cacheableLookup.lookupAsync(hostName);
|
||||
// const cacheableLookup = new CacheableLookup();
|
||||
// const hostName = new URL(config.STREAMER_ENTRYPOINT).hostname;
|
||||
// const ipHost = await cacheableLookup.lookupAsync(hostName);
|
||||
|
||||
// use the streamer service
|
||||
return got.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
|
||||
method: "POST",
|
||||
lookup: cacheableLookup.lookup,
|
||||
host: ipHost.address,
|
||||
dnsCache: cacheableLookup,
|
||||
// lookup: cacheableLookup.lookup,
|
||||
// host: ipHost.address,
|
||||
// dnsCache: cacheableLookup,
|
||||
json: {
|
||||
token: await this.repository.getToken(),
|
||||
repoFullName: this.repository.model.source.repositoryName,
|
||||
@@ -253,7 +245,7 @@ export default class AnonymizedFile {
|
||||
}
|
||||
|
||||
get filePath() {
|
||||
if (!this._originalPath) {
|
||||
if (!this._file) {
|
||||
if (this.anonymizedPath.includes(config.ANONYMIZATION_MASK)) {
|
||||
throw new AnonymousError("path_not_defined", {
|
||||
object: this,
|
||||
@@ -263,9 +255,13 @@ export default class AnonymizedFile {
|
||||
return this.anonymizedPath;
|
||||
}
|
||||
|
||||
return this._originalPath;
|
||||
return join(this._file.path, this._file.name);
|
||||
}
|
||||
|
||||
// cacheableLookup = new CacheableLookup({
|
||||
// maxTtl: 60,
|
||||
// });
|
||||
|
||||
async send(res: Response): Promise<void> {
|
||||
const anonymizer = this.repository.generateAnonymizeTransformer(
|
||||
this.anonymizedPath
|
||||
@@ -283,15 +279,15 @@ export default class AnonymizedFile {
|
||||
this.sha(),
|
||||
this.repository.getToken(),
|
||||
]);
|
||||
const cacheableLookup = new CacheableLookup();
|
||||
const hostName = new URL(config.STREAMER_ENTRYPOINT).hostname;
|
||||
const ipHost = await cacheableLookup.lookupAsync(hostName);
|
||||
// const hostName = new URL(config.STREAMER_ENTRYPOINT).hostname;
|
||||
// const ipHost = await this.cacheableLookup.lookupAsync(hostName);
|
||||
// console.timeLog("streamer"+ this.anonymizedPath, "got ip");
|
||||
got
|
||||
.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
|
||||
method: "POST",
|
||||
lookup: cacheableLookup.lookup,
|
||||
host: ipHost.address,
|
||||
dnsCache: cacheableLookup,
|
||||
// lookup: this.cacheableLookup.lookup,
|
||||
// host: ipHost.address,
|
||||
// dnsCache: this.cacheableLookup,
|
||||
json: {
|
||||
sha,
|
||||
token,
|
||||
@@ -331,9 +327,9 @@ export default class AnonymizedFile {
|
||||
if (!mime && data.isText) {
|
||||
res.contentType("text/plain");
|
||||
}
|
||||
if (!data.wasAnonimized && this.fileSize) {
|
||||
if (!data.wasAnonimized && this._file?.size) {
|
||||
// the text files may be anonymized and therefore the size may be different
|
||||
res.header("Content-Length", this.fileSize.toString());
|
||||
res.header("Content-Length", this._file?.size.toString());
|
||||
}
|
||||
});
|
||||
const content = await this.content();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import storage from "./storage";
|
||||
import { RepositoryStatus, Tree, TreeElement, TreeFile } from "./types";
|
||||
import { RepositoryStatus } from "./types";
|
||||
import { Readable } from "stream";
|
||||
import * as sha1 from "crypto-js/sha1";
|
||||
import User from "./User";
|
||||
@@ -16,7 +16,6 @@ import ConferenceModel from "./model/conference/conferences.model";
|
||||
import AnonymousError from "./AnonymousError";
|
||||
import { downloadQueue } from "../queue";
|
||||
import { isConnected } from "../server/database";
|
||||
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import {
|
||||
getRepositoryFromGitHub,
|
||||
GitHubRepository,
|
||||
@@ -25,9 +24,12 @@ import { trace } from "@opentelemetry/api";
|
||||
import { getToken } from "./GitHubUtils";
|
||||
import { FILE_TYPE } from "./storage/Storage";
|
||||
import config from "../config";
|
||||
|
||||
import FileModel from "./model/files/files.model";
|
||||
import { IFile } from "./model/files/files.types";
|
||||
import { join } from "path";
|
||||
import AnonymizedFile from "./AnonymizedFile";
|
||||
function anonymizeTreeRecursive(
|
||||
tree: TreeElement,
|
||||
tree: IFile[],
|
||||
terms: string[],
|
||||
opt: {
|
||||
/** Include the file sha in the response */
|
||||
@@ -35,24 +37,21 @@ function anonymizeTreeRecursive(
|
||||
} = {
|
||||
includeSha: false,
|
||||
}
|
||||
): TreeElement {
|
||||
if (typeof tree.size !== "object" && tree.sha !== undefined) {
|
||||
if (opt?.includeSha) return tree as TreeFile;
|
||||
): Partial<IFile>[] {
|
||||
return tree.map((file) => {
|
||||
return {
|
||||
size: tree.size,
|
||||
sha: sha1(tree.sha as string).toString(),
|
||||
} as TreeFile;
|
||||
}
|
||||
const output: Tree = {};
|
||||
Object.getOwnPropertyNames(tree).forEach((file) => {
|
||||
const anonymizedPath = anonymizePath(file, terms);
|
||||
output[anonymizedPath] = anonymizeTreeRecursive(
|
||||
(tree as Tree)[file],
|
||||
terms,
|
||||
opt
|
||||
);
|
||||
name: anonymizePath(file.name, terms),
|
||||
path: anonymizePath(file.path, terms),
|
||||
size: file.size,
|
||||
sha: opt.includeSha
|
||||
? file.sha
|
||||
: file.size
|
||||
? sha1(file.sha || "")
|
||||
.toString()
|
||||
.substring(0, 8)
|
||||
: undefined,
|
||||
};
|
||||
});
|
||||
return output;
|
||||
}
|
||||
|
||||
export default class Repository {
|
||||
@@ -124,13 +123,16 @@ export default class Repository {
|
||||
force?: boolean;
|
||||
/** Include the file sha in the response */
|
||||
includeSha: boolean;
|
||||
recursive?: boolean;
|
||||
path?: string;
|
||||
} = {
|
||||
force: false,
|
||||
includeSha: false,
|
||||
recursive: true,
|
||||
}
|
||||
): Promise<Tree> {
|
||||
): Promise<Partial<IFile>[]> {
|
||||
const terms = this._model.options.terms || [];
|
||||
return anonymizeTreeRecursive(await this.files(opt), terms, opt) as Tree;
|
||||
return anonymizeTreeRecursive(await this.files(opt), terms, opt);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -140,32 +142,81 @@ export default class Repository {
|
||||
* @returns The file tree
|
||||
*/
|
||||
async files(
|
||||
opt: { force?: boolean; progress?: (status: string) => void } = {
|
||||
opt: {
|
||||
recursive?: boolean;
|
||||
path?: string;
|
||||
force?: boolean;
|
||||
progress?: (status: string) => void;
|
||||
} = {
|
||||
recursive: true,
|
||||
force: false,
|
||||
}
|
||||
): Promise<Tree> {
|
||||
): Promise<IFile[]> {
|
||||
const span = trace.getTracer("ano-file").startSpan("Repository.files");
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
try {
|
||||
if (!this._model.originalFiles && !opt.force) {
|
||||
const res = await AnonymizedRepositoryModel.findById(this._model._id, {
|
||||
originalFiles: 1,
|
||||
const hasFile = await FileModel.exists({ repoId: this.repoId }).exec();
|
||||
if (!hasFile || opt.force) {
|
||||
await FileModel.deleteMany({ repoId: this.repoId }).exec();
|
||||
const files = await this.source.getFiles(opt.progress);
|
||||
files.forEach((f) => (f.repoId = this.repoId));
|
||||
await FileModel.insertMany(files);
|
||||
|
||||
this._model.size = { storage: 0, file: 0 };
|
||||
await this.computeSize();
|
||||
}
|
||||
if (opt.path?.includes(config.ANONYMIZATION_MASK)) {
|
||||
const f = new AnonymizedFile({
|
||||
repository: this,
|
||||
anonymizedPath: opt.path,
|
||||
});
|
||||
if (!res) throw new AnonymousError("repository_not_found");
|
||||
this.model.originalFiles = res.originalFiles;
|
||||
opt.path = await f.originalPath();
|
||||
console.log(opt.path, f);
|
||||
// const anoPath = opt.path.split(config.ANONYMIZATION_MASK);
|
||||
// let beforePath = anoPath[0];
|
||||
// if (beforePath.endsWith("/")) {
|
||||
// beforePath = beforePath.substring(0, beforePath.length - 1);
|
||||
// }
|
||||
// let afterPath =
|
||||
// anoPath[1].indexOf("/") > -1
|
||||
// ? anoPath[1].substring(anoPath[1].indexOf("/") + 1)
|
||||
// : "";
|
||||
// const anoTerm = opt.path.substring(
|
||||
// opt.path.indexOf(config.ANONYMIZATION_MASK),
|
||||
// afterPath ? opt.path.indexOf(afterPath) - 1 : undefined
|
||||
// );
|
||||
|
||||
// const candidates = await FileModel.find({
|
||||
// repoId: this.repoId,
|
||||
// path: new RegExp(`^${beforePath}$`),
|
||||
// }).exec();
|
||||
// let found = false;
|
||||
// for (const candidate of candidates) {
|
||||
// const p = anonymizePath(
|
||||
// candidate.name,
|
||||
// this._model.options.terms || []
|
||||
// );
|
||||
// if (p == anoTerm) {
|
||||
// opt.path = join(beforePath, candidate.name, afterPath);
|
||||
// found = true;
|
||||
// }
|
||||
// }
|
||||
// if (found === false) {
|
||||
// throw new AnonymousError("path_not_found");
|
||||
// }
|
||||
}
|
||||
if (
|
||||
this._model.originalFiles &&
|
||||
Object.getOwnPropertyNames(this._model.originalFiles).length !== 0 &&
|
||||
!opt.force
|
||||
) {
|
||||
return this._model.originalFiles;
|
||||
|
||||
let pathQuery: string | RegExp | undefined = opt.path
|
||||
? new RegExp(`^${opt.path}`)
|
||||
: undefined;
|
||||
if (opt.recursive === false) {
|
||||
pathQuery = opt.path ? new RegExp(`^${opt.path}$`) : "";
|
||||
}
|
||||
const files = await this.source.getFiles(opt.progress);
|
||||
this._model.originalFiles = files;
|
||||
this._model.size = { storage: 0, file: 0 };
|
||||
await this.computeSize();
|
||||
return files;
|
||||
|
||||
return await FileModel.find({
|
||||
repoId: this.repoId,
|
||||
path: pathQuery,
|
||||
}).exec();
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
@@ -379,6 +430,7 @@ export default class Repository {
|
||||
span.end();
|
||||
return;
|
||||
}
|
||||
this.model.increment();
|
||||
await this.updateStatus(RepositoryStatus.DOWNLOAD);
|
||||
await this.files({
|
||||
force: false,
|
||||
@@ -461,12 +513,14 @@ export default class Repository {
|
||||
span.setAttribute("repoId", this.repoId);
|
||||
// remove attribute
|
||||
this._model.size = { storage: 0, file: 0 };
|
||||
this._model.originalFiles = undefined;
|
||||
if (status) {
|
||||
await this.updateStatus(status, statusMessage);
|
||||
}
|
||||
// remove cache
|
||||
await this.removeCache();
|
||||
await Promise.all([
|
||||
FileModel.deleteMany({ repoID: this.repoId }).exec(),
|
||||
this.removeCache(),
|
||||
]);
|
||||
console.log(`[RESET] ${this._model.repoId} has been reset`);
|
||||
span.end();
|
||||
}
|
||||
@@ -514,24 +568,24 @@ export default class Repository {
|
||||
if (this.status !== RepositoryStatus.READY)
|
||||
return { storage: 0, file: 0 };
|
||||
if (this._model.size.file) return this._model.size;
|
||||
function recursiveCount(files: Tree): { storage: number; file: number } {
|
||||
const out = { storage: 0, file: 0 };
|
||||
for (const name in files) {
|
||||
const file = files[name];
|
||||
if (file.size && parseInt(file.size.toString()) == file.size) {
|
||||
out.storage += file.size as number;
|
||||
out.file++;
|
||||
} else if (typeof file == "object") {
|
||||
const r = recursiveCount(file as Tree);
|
||||
out.storage += r.storage;
|
||||
out.file += r.file;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
const files = await this.files();
|
||||
this._model.size = recursiveCount(files);
|
||||
const res = await FileModel.aggregate([
|
||||
{
|
||||
$match: {
|
||||
repoId: this.repoId,
|
||||
},
|
||||
},
|
||||
{
|
||||
$group: {
|
||||
_id: "$repoId",
|
||||
storage: { $sum: "$size" },
|
||||
file: { $sum: 1 },
|
||||
},
|
||||
},
|
||||
]);
|
||||
this._model.size = {
|
||||
storage: res[0]?.storage || 0,
|
||||
file: res[0]?.file || 0,
|
||||
};
|
||||
if (isConnected) {
|
||||
await this._model.save();
|
||||
}
|
||||
|
||||
@@ -34,7 +34,6 @@ const AnonymizedRepositorySchema = new Schema({
|
||||
type: Boolean,
|
||||
default: false,
|
||||
},
|
||||
originalFiles: Schema.Types.Mixed,
|
||||
options: {
|
||||
terms: [String],
|
||||
expirationMode: { type: String },
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { Document, Model } from "mongoose";
|
||||
import { RepositoryStatus, Tree } from "../../types";
|
||||
import { RepositoryStatus } from "../../types";
|
||||
|
||||
export interface IAnonymizedRepository {
|
||||
repoId: string;
|
||||
@@ -11,14 +11,13 @@ export interface IAnonymizedRepository {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip";
|
||||
branch?: string;
|
||||
commit?: string;
|
||||
commitDate?: Date,
|
||||
commitDate?: Date;
|
||||
repositoryId?: string;
|
||||
repositoryName?: string;
|
||||
accessToken?: string;
|
||||
};
|
||||
owner: string;
|
||||
truckedFileList: boolean;
|
||||
originalFiles?: Tree;
|
||||
conference: string;
|
||||
options: {
|
||||
terms: string[];
|
||||
|
||||
8
src/core/model/files/files.model.ts
Normal file
8
src/core/model/files/files.model.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { model } from "mongoose";
|
||||
import { join } from "path";
|
||||
|
||||
import { IFileDocument, IFileModel } from "./files.types";
|
||||
import FileSchema from "./files.schema";
|
||||
|
||||
const FileModel = model<IFileDocument>("File", FileSchema) as IFileModel;
|
||||
export default FileModel;
|
||||
19
src/core/model/files/files.schema.ts
Normal file
19
src/core/model/files/files.schema.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import { Schema } from "mongoose";
|
||||
|
||||
const FileSchema = new Schema({
|
||||
name: { type: String, index: true },
|
||||
path: { type: String, index: true },
|
||||
repoId: { type: String, index: true },
|
||||
sha: {
|
||||
type: String,
|
||||
},
|
||||
size: {
|
||||
type: Number,
|
||||
},
|
||||
});
|
||||
|
||||
FileSchema.methods.toString = function () {
|
||||
return `${this.path}/${this.name}`;
|
||||
};
|
||||
|
||||
export default FileSchema;
|
||||
14
src/core/model/files/files.types.ts
Normal file
14
src/core/model/files/files.types.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { Document, Model } from "mongoose";
|
||||
|
||||
export interface IFile {
|
||||
name: string;
|
||||
path: string;
|
||||
repoId: string;
|
||||
sha?: string;
|
||||
size?: number;
|
||||
}
|
||||
|
||||
export interface IFileDocument extends IFile, Document {
|
||||
toString: (this: IFileDocument) => string;
|
||||
}
|
||||
export interface IFileModel extends Model<IFileDocument> {}
|
||||
@@ -1,8 +1,8 @@
|
||||
import { Readable } from "stream";
|
||||
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { Tree } from "../types";
|
||||
import { SourceBase } from "./Source";
|
||||
import { IFile } from "../model/files/files.types";
|
||||
|
||||
export interface GitHubBaseData {
|
||||
getToken: () => string | Promise<string>;
|
||||
@@ -23,5 +23,5 @@ export default abstract class GitHubBase implements SourceBase {
|
||||
progress?: (status: string) => void
|
||||
): Promise<Readable>;
|
||||
|
||||
abstract getFiles(progress?: (status: string) => void): Promise<Tree>;
|
||||
abstract getFiles(progress?: (status: string) => void): Promise<IFile[]>;
|
||||
}
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import GitHubBase, { GitHubBaseData } from "./GitHubBase";
|
||||
import storage from "../storage";
|
||||
import { Tree } from "../types";
|
||||
import * as path from "path";
|
||||
import got from "got";
|
||||
import { basename, dirname } from "path";
|
||||
|
||||
import * as stream from "stream";
|
||||
import AnonymousError from "../AnonymousError";
|
||||
import config from "../../config";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { FILE_TYPE } from "../storage/Storage";
|
||||
import { octokit } from "../GitHubUtils";
|
||||
import FileModel from "../model/files/files.model";
|
||||
import { IFile } from "../model/files/files.types";
|
||||
|
||||
export default class GitHubStream extends GitHubBase {
|
||||
type: "GitHubDownload" | "GitHubStream" | "Zip" = "GitHubStream";
|
||||
@@ -29,6 +30,7 @@ export default class GitHubStream extends GitHubBase {
|
||||
repo: this.data.repoName,
|
||||
file_sha: sha,
|
||||
});
|
||||
console.log("[GHStream] Downloading file", url);
|
||||
return got.stream(url, {
|
||||
headers: {
|
||||
"X-GitHub-Api-Version": "2022-11-28",
|
||||
@@ -132,61 +134,17 @@ export default class GitHubStream extends GitHubBase {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.getFiles");
|
||||
span.setAttribute("repoId", this.data.repoId);
|
||||
try {
|
||||
return this.getTree(this.data.commit, progress);
|
||||
return this.getTruncatedTree(this.data.commit, progress);
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
private async getTree(
|
||||
private async getGHTree(
|
||||
sha: string,
|
||||
progress?: (status: string) => void,
|
||||
truncatedTree: Tree = {},
|
||||
parentPath: string = "",
|
||||
count = {
|
||||
file: 0,
|
||||
request: 0,
|
||||
}
|
||||
count = { request: 0, file: 0 },
|
||||
opt = { recursive: true, callback: () => {} }
|
||||
) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.getTree");
|
||||
span.setAttribute("sha", sha);
|
||||
|
||||
let ghRes: Awaited<ReturnType<typeof this.getGHTree>>;
|
||||
try {
|
||||
count.request++;
|
||||
ghRes = await this.getGHTree(sha, { recursive: true });
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
if ((error as any).status == 409) {
|
||||
// cannot be empty otherwise it would try to download it again
|
||||
span.end();
|
||||
return { __: {} };
|
||||
} else {
|
||||
const err = new AnonymousError("repo_not_accessible", {
|
||||
httpStatus: (error as any).status,
|
||||
cause: error as Error,
|
||||
object: {
|
||||
tree_sha: sha,
|
||||
},
|
||||
});
|
||||
span.recordException(err);
|
||||
span.end();
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
const tree = this.tree2Tree(ghRes.tree, truncatedTree, parentPath);
|
||||
count.file += ghRes.tree.length;
|
||||
if (progress) {
|
||||
progress("List file: " + count.file);
|
||||
}
|
||||
if (ghRes.truncated) {
|
||||
await this.getTruncatedTree(sha, progress, tree, parentPath, count);
|
||||
}
|
||||
span.end();
|
||||
return tree;
|
||||
}
|
||||
|
||||
private async getGHTree(sha: string, opt = { recursive: true }) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.getGHTree");
|
||||
span.setAttribute("sha", sha);
|
||||
try {
|
||||
@@ -195,8 +153,13 @@ export default class GitHubStream extends GitHubBase {
|
||||
owner: this.data.organization,
|
||||
repo: this.data.repoName,
|
||||
tree_sha: sha,
|
||||
recursive: opt.recursive ? "1" : undefined,
|
||||
recursive: opt.recursive === true ? "1" : undefined,
|
||||
});
|
||||
count.request++;
|
||||
count.file += ghRes.data.tree.length;
|
||||
if (opt.callback) {
|
||||
opt.callback();
|
||||
}
|
||||
return ghRes.data;
|
||||
} finally {
|
||||
span.end();
|
||||
@@ -206,68 +169,59 @@ export default class GitHubStream extends GitHubBase {
|
||||
private async getTruncatedTree(
|
||||
sha: string,
|
||||
progress?: (status: string) => void,
|
||||
truncatedTree: Tree = {},
|
||||
parentPath: string = "",
|
||||
count = {
|
||||
file: 0,
|
||||
request: 0,
|
||||
},
|
||||
depth = 0
|
||||
parentPath: string = ""
|
||||
) {
|
||||
const count = {
|
||||
request: 0,
|
||||
file: 0,
|
||||
};
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("GHStream.getTruncatedTree");
|
||||
span.setAttribute("sha", sha);
|
||||
span.setAttribute("parentPath", parentPath);
|
||||
const output: IFile[] = [];
|
||||
try {
|
||||
count.request++;
|
||||
let data = null;
|
||||
|
||||
try {
|
||||
data = await this.getGHTree(sha, {
|
||||
data = await this.getGHTree(sha, count, {
|
||||
recursive: false,
|
||||
callback: () => {
|
||||
if (progress) {
|
||||
progress("List file: " + count.file);
|
||||
}
|
||||
},
|
||||
});
|
||||
this.tree2Tree(data.tree, truncatedTree, parentPath);
|
||||
output.push(...this.tree2Tree(data.tree, parentPath));
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
return;
|
||||
throw new AnonymousError("files_not_found", {
|
||||
httpStatus: 404,
|
||||
object: this.data,
|
||||
cause: error as Error,
|
||||
});
|
||||
}
|
||||
|
||||
count.file += data.tree.length;
|
||||
if (progress) {
|
||||
progress("List file: " + count.file);
|
||||
}
|
||||
if (data.tree.length < 100 && count.request < 200) {
|
||||
const promises: Promise<any>[] = [];
|
||||
for (const file of data.tree) {
|
||||
if (file.type == "tree" && file.path && file.sha) {
|
||||
const elementPath = path.join(parentPath, file.path);
|
||||
promises.push(
|
||||
this.getTruncatedTree(
|
||||
file.sha,
|
||||
progress,
|
||||
truncatedTree,
|
||||
elementPath,
|
||||
count,
|
||||
depth + 1
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
await Promise.all(promises);
|
||||
} else {
|
||||
try {
|
||||
const data = await this.getGHTree(sha, {
|
||||
recursive: true,
|
||||
});
|
||||
this.tree2Tree(data.tree, truncatedTree, parentPath);
|
||||
if (data.truncated) {
|
||||
// TODO: TRUNCATED
|
||||
}
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
const promises: Promise<any>[] = [];
|
||||
const parentPaths: string[] = [];
|
||||
for (const file of data.tree) {
|
||||
if (file.type == "tree" && file.path && file.sha) {
|
||||
const elementPath = path.join(parentPath, file.path);
|
||||
parentPaths.push(elementPath);
|
||||
promises.push(
|
||||
this.getGHTree(file.sha, count, {
|
||||
recursive: true,
|
||||
callback: () => {
|
||||
if (progress) {
|
||||
progress("List file: " + count.file);
|
||||
}
|
||||
},
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
(await Promise.all(promises)).forEach((data, i) => {
|
||||
output.push(...this.tree2Tree(data.tree, parentPaths[i]));
|
||||
});
|
||||
return output;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
@@ -282,49 +236,25 @@ export default class GitHubStream extends GitHubBase {
|
||||
size?: number;
|
||||
url?: string;
|
||||
}[],
|
||||
partialTree: Tree = {},
|
||||
parentPath: string = ""
|
||||
) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.tree2Tree");
|
||||
span.setAttribute("parentPath", parentPath);
|
||||
try {
|
||||
for (let elem of tree) {
|
||||
let current = partialTree;
|
||||
|
||||
if (!elem.path) continue;
|
||||
|
||||
const paths = path.join(parentPath, elem.path).split("/");
|
||||
|
||||
// if elem is a folder iterate on all folders if it is a file stop before the filename
|
||||
const end = elem.type == "tree" ? paths.length : paths.length - 1;
|
||||
for (let i = 0; i < end; i++) {
|
||||
let p = paths[i];
|
||||
if (p[0] == "$") {
|
||||
p = "\\" + p;
|
||||
}
|
||||
if (!current[p]) {
|
||||
current[p] = {};
|
||||
}
|
||||
current = current[p] as Tree;
|
||||
return tree.map((elem) => {
|
||||
const fullPath = path.join(parentPath, elem.path || "");
|
||||
let pathFile = dirname(fullPath);
|
||||
if (pathFile === ".") {
|
||||
pathFile = "";
|
||||
}
|
||||
|
||||
// if elem is a file add the file size in the file list
|
||||
if (elem.type == "blob") {
|
||||
if (Object.keys(current).length > config.MAX_FILE_FOLDER) {
|
||||
// TODO: TRUNCATED
|
||||
continue;
|
||||
}
|
||||
let p = paths[end];
|
||||
if (p[0] == "$") {
|
||||
p = "\\" + p;
|
||||
}
|
||||
current[p] = {
|
||||
size: elem.size || 0, // size in bit
|
||||
sha: elem.sha || "",
|
||||
};
|
||||
}
|
||||
}
|
||||
return partialTree;
|
||||
return new FileModel({
|
||||
name: basename(fullPath),
|
||||
path: pathFile,
|
||||
repoId: this.data.repoId,
|
||||
size: elem.size,
|
||||
sha: elem.sha,
|
||||
});
|
||||
});
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import { Readable } from "stream";
|
||||
|
||||
import AnonymizedFile from "../AnonymizedFile";
|
||||
import { Tree } from "../types";
|
||||
import GitHubDownload from "./GitHubDownload";
|
||||
import GitHubStream from "./GitHubStream";
|
||||
import Zip from "./Zip";
|
||||
import { IFile } from "../model/files/files.types";
|
||||
|
||||
export type Source = GitHubDownload | GitHubStream | Zip;
|
||||
|
||||
@@ -20,5 +20,5 @@ export interface SourceBase {
|
||||
/**
|
||||
* Get all the files from a specific source
|
||||
*/
|
||||
getFiles(progress?: (status: string) => void): Promise<Tree>;
|
||||
getFiles(progress?: (status: string) => void): Promise<IFile[]>;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import { Tree } from "../types";
|
||||
import config from "../../config";
|
||||
import * as fs from "fs";
|
||||
import { Extract } from "unzip-stream";
|
||||
@@ -10,6 +9,8 @@ import { promisify } from "util";
|
||||
import { lookup } from "mime-types";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import StorageBase, { FILE_TYPE } from "./Storage";
|
||||
import FileModel from "../model/files/files.model";
|
||||
import { IFile } from "../model/files/files.types";
|
||||
|
||||
export default class FileSystem extends StorageBase {
|
||||
type = "FileSystem";
|
||||
@@ -138,23 +139,25 @@ export default class FileSystem extends StorageBase {
|
||||
opt: {
|
||||
onEntry?: (file: { path: string; size: number }) => void;
|
||||
} = {}
|
||||
): Promise<Tree> {
|
||||
): Promise<IFile[]> {
|
||||
return trace
|
||||
.getTracer("ano-file")
|
||||
.startActiveSpan("fs.listFiles", async (span) => {
|
||||
span.setAttribute("path", dir);
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), dir);
|
||||
let files = await fs.promises.readdir(fullPath);
|
||||
const output: Tree = {};
|
||||
const output2: IFile[] = [];
|
||||
for (let file of files) {
|
||||
let filePath = join(fullPath, file);
|
||||
try {
|
||||
const stats = await fs.promises.stat(filePath);
|
||||
if (file[0] == "$") {
|
||||
file = "\\" + file;
|
||||
}
|
||||
if (stats.isDirectory()) {
|
||||
output[file] = await this.listFiles(repoId, join(dir, file), opt);
|
||||
output2.push(
|
||||
new FileModel({ name: file, path: dir, repoID: repoId })
|
||||
);
|
||||
output2.push(
|
||||
...(await this.listFiles(repoId, join(dir, file), opt))
|
||||
);
|
||||
} else if (stats.isFile()) {
|
||||
if (opt.onEntry) {
|
||||
opt.onEntry({
|
||||
@@ -162,14 +165,22 @@ export default class FileSystem extends StorageBase {
|
||||
size: stats.size,
|
||||
});
|
||||
}
|
||||
output[file] = { size: stats.size, sha: stats.ino.toString() };
|
||||
output2.push(
|
||||
new FileModel({
|
||||
name: file,
|
||||
path: dir,
|
||||
repoID: repoId,
|
||||
size: stats.size,
|
||||
sha: stats.ino.toString(),
|
||||
})
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
}
|
||||
}
|
||||
span.end();
|
||||
return output;
|
||||
return output2;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -14,9 +14,10 @@ import { lookup } from "mime-types";
|
||||
import * as archiver from "archiver";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { dirname, basename, join } from "path";
|
||||
import { Tree, TreeFile } from "../types";
|
||||
import AnonymousError from "../AnonymousError";
|
||||
import StorageBase, { FILE_TYPE } from "./Storage";
|
||||
import { IFile } from "../model/files/files.types";
|
||||
import FileModel from "../model/files/files.model";
|
||||
|
||||
export default class S3Storage extends StorageBase {
|
||||
type = "AWS";
|
||||
@@ -245,13 +246,13 @@ export default class S3Storage extends StorageBase {
|
||||
}
|
||||
|
||||
/** @override */
|
||||
async listFiles(repoId: string, dir: string = ""): Promise<Tree> {
|
||||
async listFiles(repoId: string, dir: string = ""): Promise<IFile[]> {
|
||||
const span = trace.getTracer("ano-file").startSpan("s3.listFiles");
|
||||
span.setAttribute("path", dir);
|
||||
try {
|
||||
if (!config.S3_BUCKET) throw new Error("S3_BUCKET not set");
|
||||
if (dir && dir[dir.length - 1] != "/") dir = dir + "/";
|
||||
const out: Tree = {};
|
||||
const out: IFile[] = [];
|
||||
let req: ListObjectsV2CommandOutput;
|
||||
let nextContinuationToken: string | undefined;
|
||||
do {
|
||||
@@ -267,22 +268,15 @@ export default class S3Storage extends StorageBase {
|
||||
for (const f of req.Contents) {
|
||||
if (!f.Key) continue;
|
||||
f.Key = f.Key.replace(join(this.repoPath(repoId), dir), "");
|
||||
const paths = f.Key.split("/");
|
||||
let current: Tree = out;
|
||||
for (let i = 0; i < paths.length - 1; i++) {
|
||||
let p = paths[i];
|
||||
if (!p) continue;
|
||||
if (!(current[p] as Tree)) {
|
||||
current[p] = {} as Tree;
|
||||
}
|
||||
current = current[p] as Tree;
|
||||
}
|
||||
|
||||
if (f.ETag) {
|
||||
const fileInfo: TreeFile = { size: f.Size || 0, sha: f.ETag };
|
||||
const fileName = paths[paths.length - 1];
|
||||
if (fileName) current[fileName] = fileInfo;
|
||||
}
|
||||
out.push(
|
||||
new FileModel({
|
||||
name: basename(f.Key),
|
||||
path: dirname(f.Key),
|
||||
repoID: repoId,
|
||||
size: f.Size,
|
||||
sha: f.ETag,
|
||||
})
|
||||
);
|
||||
}
|
||||
} while (req && req.Contents && req.IsTruncated);
|
||||
return out;
|
||||
|
||||
@@ -3,9 +3,9 @@ import { Transform, Readable } from "stream";
|
||||
import * as archiver from "archiver";
|
||||
import { Response } from "express";
|
||||
|
||||
import { Tree } from "../types";
|
||||
import S3Storage from "./S3";
|
||||
import FileSystem from "./FileSystem";
|
||||
import { IFile } from "../model/files/files.types";
|
||||
|
||||
export type Storage = S3Storage | FileSystem;
|
||||
|
||||
@@ -62,7 +62,7 @@ export default abstract class StorageBase {
|
||||
* List the files from dir
|
||||
* @param dir
|
||||
*/
|
||||
abstract listFiles(repoId: string, dir: string): Promise<Tree>;
|
||||
abstract listFiles(repoId: string, dir: string): Promise<IFile[]>;
|
||||
|
||||
/**
|
||||
* Extract the content of tar to dir
|
||||
|
||||
@@ -19,14 +19,3 @@ export enum RepositoryStatus {
|
||||
export type ConferenceStatus = "ready" | "expired" | "removed";
|
||||
|
||||
export type SourceStatus = "available" | "unavailable";
|
||||
|
||||
export type TreeElement = Tree | TreeFile;
|
||||
|
||||
export interface Tree {
|
||||
[key: string]: TreeElement;
|
||||
}
|
||||
|
||||
export interface TreeFile {
|
||||
sha: string;
|
||||
size: number;
|
||||
}
|
||||
|
||||
@@ -277,7 +277,6 @@ router.get("/conferences", async (req, res) => {
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
res.json({
|
||||
query: query,
|
||||
page,
|
||||
|
||||
@@ -26,7 +26,6 @@ router.get(
|
||||
|
||||
const repo = await getRepo(req, res, {
|
||||
nocheck: false,
|
||||
includeFiles: false,
|
||||
});
|
||||
if (!repo) return;
|
||||
|
||||
|
||||
@@ -131,7 +131,6 @@ router.post(
|
||||
try {
|
||||
const repo = await getRepo(req, res, {
|
||||
nocheck: true,
|
||||
includeFiles: false,
|
||||
});
|
||||
if (!repo) return;
|
||||
|
||||
@@ -158,7 +157,6 @@ router.delete(
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
const repo = await getRepo(req, res, {
|
||||
nocheck: true,
|
||||
includeFiles: false,
|
||||
});
|
||||
if (!repo) return;
|
||||
// if (repo.status == "removing") return res.json({ status: repo.status });
|
||||
@@ -271,7 +269,6 @@ router.get("/:repoId/", async (req: express.Request, res: express.Response) => {
|
||||
try {
|
||||
const repo = await getRepo(req, res, {
|
||||
nocheck: true,
|
||||
includeFiles: false,
|
||||
});
|
||||
if (!repo) return;
|
||||
|
||||
@@ -364,7 +361,6 @@ router.post(
|
||||
try {
|
||||
const repo = await getRepo(req, res, {
|
||||
nocheck: true,
|
||||
includeFiles: false,
|
||||
});
|
||||
if (!repo) return;
|
||||
const user = await getUser(req);
|
||||
|
||||
@@ -63,10 +63,16 @@ router.get(
|
||||
"/:repoId/files",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
res.header("Cache-Control", "no-cache");
|
||||
const repo = await getRepo(req, res, { includeFiles: true });
|
||||
const repo = await getRepo(req, res);
|
||||
if (!repo) return;
|
||||
try {
|
||||
res.json(await repo.anonymizedFiles({ includeSha: false }));
|
||||
res.json(
|
||||
await repo.anonymizedFiles({
|
||||
includeSha: false,
|
||||
recursive: false,
|
||||
path: req.query.path as string,
|
||||
})
|
||||
);
|
||||
} catch (error) {
|
||||
handleError(error, res, req);
|
||||
}
|
||||
@@ -80,7 +86,6 @@ router.get(
|
||||
res.header("Cache-Control", "no-cache");
|
||||
const repo = await getRepo(req, res, {
|
||||
nocheck: true,
|
||||
includeFiles: false,
|
||||
});
|
||||
if (!repo) return;
|
||||
let redirectURL = null;
|
||||
|
||||
@@ -38,15 +38,12 @@ export async function getPullRequest(
|
||||
export async function getRepo(
|
||||
req: express.Request,
|
||||
res: express.Response,
|
||||
opt: { nocheck?: boolean; includeFiles?: boolean } = {
|
||||
opt: { nocheck?: boolean } = {
|
||||
nocheck: false,
|
||||
includeFiles: false,
|
||||
}
|
||||
) {
|
||||
try {
|
||||
const repo = await db.getRepository(req.params.repoId, {
|
||||
includeFiles: opt.includeFiles === true,
|
||||
});
|
||||
const repo = await db.getRepository(req.params.repoId);
|
||||
if (opt.nocheck == true) {
|
||||
} else {
|
||||
// redirect if the repository is expired
|
||||
|
||||
@@ -3,7 +3,6 @@ import { getRepo, handleError } from "./route-utils";
|
||||
import * as path from "path";
|
||||
import AnonymizedFile from "../../core/AnonymizedFile";
|
||||
import AnonymousError from "../../core/AnonymousError";
|
||||
import { Tree, TreeElement } from "../../core/types";
|
||||
import * as marked from "marked";
|
||||
import { streamToString } from "../../core/anonymize-utils";
|
||||
|
||||
@@ -35,55 +34,45 @@ async function webView(req: express.Request, res: express.Response) {
|
||||
});
|
||||
}
|
||||
|
||||
if (repo.options.pageSource?.branch != repo.model.source.branch) {
|
||||
if (repo.options.pageSource.branch != repo.model.source.branch) {
|
||||
throw new AnonymousError("page_not_supported_on_different_branch", {
|
||||
httpStatus: 400,
|
||||
object: repo,
|
||||
});
|
||||
}
|
||||
|
||||
let requestPath = path.join(
|
||||
repo.options.pageSource?.path,
|
||||
req.path.substring(
|
||||
req.path.indexOf(req.params.repoId) + req.params.repoId.length
|
||||
)
|
||||
);
|
||||
let wRoot = repo.options.pageSource.path;
|
||||
if (wRoot.at(0) == "/") {
|
||||
wRoot = wRoot.substring(1);
|
||||
}
|
||||
const filePath = req.path.split(req.params.repoId)[1];
|
||||
let requestPath = path.join(wRoot, filePath);
|
||||
|
||||
let f = new AnonymizedFile({
|
||||
repository: repo,
|
||||
anonymizedPath: requestPath,
|
||||
});
|
||||
if (requestPath[requestPath.length - 1] == "/") {
|
||||
// find index file
|
||||
const paths = f.anonymizedPath.trim().split("/");
|
||||
|
||||
let currentAnonymized: TreeElement = await repo.anonymizedFiles({
|
||||
includeSha: true,
|
||||
if (
|
||||
requestPath.at(-1) == "/" &&
|
||||
req.headers.accept?.includes("text/html")
|
||||
) {
|
||||
// look for index file
|
||||
const candidates = await repo.files({
|
||||
recursive: false,
|
||||
path: await f.originalPath(),
|
||||
});
|
||||
for (let i = 0; i < paths.length; i++) {
|
||||
const fileName = paths[i];
|
||||
if (fileName == "") {
|
||||
continue;
|
||||
}
|
||||
if (!(currentAnonymized as Tree)[fileName]) {
|
||||
throw new AnonymousError("file_not_found", {
|
||||
object: repo,
|
||||
httpStatus: 404,
|
||||
});
|
||||
}
|
||||
currentAnonymized = (currentAnonymized as Tree)[fileName];
|
||||
}
|
||||
|
||||
let best_match = null;
|
||||
let bestMatch = null;
|
||||
indexSelector: for (const p of indexPriority) {
|
||||
for (let filename in currentAnonymized) {
|
||||
if (filename.toLowerCase() == p) {
|
||||
best_match = filename;
|
||||
for (const file of candidates) {
|
||||
if (file.name.toLowerCase() == p) {
|
||||
bestMatch = file;
|
||||
break indexSelector;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (best_match) {
|
||||
requestPath = path.join(requestPath, best_match);
|
||||
if (bestMatch) {
|
||||
requestPath = path.join(bestMatch.path, bestMatch.name);
|
||||
f = new AnonymizedFile({
|
||||
repository: repo,
|
||||
anonymizedPath: requestPath,
|
||||
|
||||
Reference in New Issue
Block a user