diff --git a/public/partials/status.htm b/public/partials/status.htm
index efe57d4..e3f9efe 100644
--- a/public/partials/status.htm
+++ b/public/partials/status.htm
@@ -21,10 +21,9 @@
>
{{repo.status | title}}
-
+ : {{repo.statusMessage | title}}
diff --git a/src/core/AnonymizedFile.ts b/src/core/AnonymizedFile.ts
index 885f0fd..e45dd5a 100644
--- a/src/core/AnonymizedFile.ts
+++ b/src/core/AnonymizedFile.ts
@@ -11,7 +11,6 @@ import { anonymizePath, isTextFile } from "./anonymize-utils";
import AnonymousError from "./AnonymousError";
import { handleError } from "../server/routes/route-utils";
import got from "got";
-import storage from "./storage";
/**
* Represent a file in a anonymized repository
diff --git a/src/core/Repository.ts b/src/core/Repository.ts
index f1f462b..2fd6893 100644
--- a/src/core/Repository.ts
+++ b/src/core/Repository.ts
@@ -16,10 +16,14 @@ import AnonymousError from "./AnonymousError";
import { downloadQueue } from "../queue";
import { isConnected } from "../server/database";
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
-import { GitHubRepository } from "./source/GitHubRepository";
+import {
+ getRepositoryFromGitHub,
+ GitHubRepository,
+} from "./source/GitHubRepository";
import { trace } from "@opentelemetry/api";
import { getToken } from "./GitHubUtils";
import { FILE_TYPE } from "./storage/Storage";
+import config from "../config";
function anonymizeTreeRecursive(
tree: TreeElement,
@@ -129,7 +133,11 @@ export default class Repository {
* @param opt force to get an updated list of files
* @returns The file tree
*/
- async files(opt: { force?: boolean } = { force: false }): Promise {
+ async files(
+ opt: { force?: boolean; progress?: (status: string) => void } = {
+ force: false,
+ }
+ ): Promise {
const span = trace.getTracer("ano-file").startSpan("Repository.files");
span.setAttribute("repoId", this.repoId);
try {
@@ -147,7 +155,7 @@ export default class Repository {
) {
return this._model.originalFiles;
}
- const files = await this.source.getFiles();
+ const files = await this.source.getFiles(opt.progress);
this._model.originalFiles = files;
this._model.size = { storage: 0, file: 0 };
await this.computeSize();
@@ -306,6 +314,25 @@ export default class Repository {
`[UPDATE] ${this._model.repoId} will be updated to ${newCommit}`
);
+ const repository = await getRepositoryFromGitHub({
+ accessToken: await this.getToken(),
+ owner: this.source.data.organization,
+ repo: this.source.data.repoName,
+ });
+ if (repository.size) {
+ if (
+ repository.size > config.AUTO_DOWNLOAD_REPO_SIZE &&
+ this.model.source.type == "GitHubDownload"
+ ) {
+ this.model.source.type = "GitHubStream";
+ } else if (
+ repository.size < config.AUTO_DOWNLOAD_REPO_SIZE &&
+ this.model.source.type == "GitHubStream"
+ ) {
+ this.model.source.type = "GitHubDownload";
+ }
+ }
+
await this.resetSate(RepositoryStatus.PREPARING);
await downloadQueue.add(this.repoId, this, {
jobId: this.repoId,
@@ -320,16 +347,20 @@ export default class Repository {
*
* @returns void
*/
- async anonymize() {
+ async anonymize(progress?: (status: string) => void) {
const span = trace.getTracer("ano-file").startSpan("Repository.anonymize");
span.setAttribute("repoId", this.repoId);
if (this.status === RepositoryStatus.READY) {
span.end();
return;
}
- await this.updateStatus(RepositoryStatus.PREPARING);
- await this.files();
+ await this.updateStatus(RepositoryStatus.DOWNLOAD);
+ await this.files({
+ force: false,
+ progress,
+ });
await this.updateStatus(RepositoryStatus.READY);
+ await this.computeSize();
span.end();
}
diff --git a/src/core/source/GitHubDownload.ts b/src/core/source/GitHubDownload.ts
index e2abe64..4c39b5a 100644
--- a/src/core/source/GitHubDownload.ts
+++ b/src/core/source/GitHubDownload.ts
@@ -42,25 +42,16 @@ export default class GitHubDownload extends GitHubBase {
});
}
await storage.mk(this.data.repoId);
- let downloadProgress: { transferred: number } | undefined = undefined;
- let progressTimeout;
- let inDownload = true;
-
- async function updateProgress() {
- if (inDownload) {
- if (progress) {
- progress(downloadProgress?.transferred?.toString() || "");
- }
- progressTimeout = setTimeout(updateProgress, 1500);
- }
- }
- updateProgress();
-
try {
const downloadStream = got.stream(response.url);
- downloadStream.addListener("downloadProgress", async (p) => {
- downloadProgress = p;
- });
+ downloadStream.addListener(
+ "downloadProgress",
+ (p: { transferred?: number }) => {
+ if (progress && p.transferred) {
+ progress("Repository download: " + humanFileSize(p.transferred));
+ }
+ }
+ );
await storage.extractZip(
this.data.repoId,
"",
@@ -74,9 +65,6 @@ export default class GitHubDownload extends GitHubBase {
cause: error as Error,
object: this.data,
});
- } finally {
- inDownload = false;
- clearTimeout(progressTimeout);
}
} finally {
span.end();
@@ -116,6 +104,40 @@ export default class GitHubDownload extends GitHubBase {
if ((await storage.exists(this.data.repoId)) === FILE_TYPE.NOT_FOUND) {
await this.download(progress);
}
- return storage.listFiles(this.data.repoId);
+ let nbFiles = 0;
+ return storage.listFiles(this.data.repoId, "", {
+ onEntry: () => {
+ if (progress) {
+ nbFiles++;
+ progress("List file: " + nbFiles);
+ }
+ },
+ });
}
}
+
+function humanFileSize(bytes: number, si = false, dp = 1) {
+ const thresh = si ? 1000 : 1024;
+
+ bytes = bytes / 8;
+
+ if (Math.abs(bytes) < thresh) {
+ return bytes + "B";
+ }
+
+ const units = si
+ ? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
+ : ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
+ let u = -1;
+ const r = 10 ** dp;
+
+ do {
+ bytes /= thresh;
+ ++u;
+ } while (
+ Math.round(Math.abs(bytes) * r) / r >= thresh &&
+ u < units.length - 1
+ );
+
+ return bytes.toFixed(dp) + "" + units[u];
+}
diff --git a/src/core/source/GitHubStream.ts b/src/core/source/GitHubStream.ts
index aecb5e5..e87b87b 100644
--- a/src/core/source/GitHubStream.ts
+++ b/src/core/source/GitHubStream.ts
@@ -19,7 +19,7 @@ export default class GitHubStream extends GitHubBase {
super(data);
}
- downloadFile(token: string, sha: string) {
+ downloadFile(token: string, sha: string) {
const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile");
span.setAttribute("sha", sha);
const oct = octokit(token);
@@ -128,11 +128,11 @@ export default class GitHubStream extends GitHubBase {
}
}
- async getFiles() {
+ async getFiles(progress?: (status: string) => void) {
const span = trace.getTracer("ano-file").startSpan("GHStream.getFiles");
span.setAttribute("repoId", this.data.repoId);
try {
- return this.getTree(this.data.commit);
+ return this.getTree(this.data.commit, progress);
} finally {
span.end();
}
@@ -140,6 +140,7 @@ export default class GitHubStream extends GitHubBase {
private async getTree(
sha: string,
+ progress?: (status: string) => void,
truncatedTree: Tree = {},
parentPath: string = "",
count = {
@@ -155,7 +156,6 @@ export default class GitHubStream extends GitHubBase {
count.request++;
ghRes = await this.getGHTree(sha, { recursive: true });
} catch (error) {
- console.error(error);
span.recordException(error as Error);
if ((error as any).status == 409) {
// cannot be empty otherwise it would try to download it again
@@ -176,8 +176,11 @@ export default class GitHubStream extends GitHubBase {
}
const tree = this.tree2Tree(ghRes.tree, truncatedTree, parentPath);
count.file += ghRes.tree.length;
+ if (progress) {
+ progress("List file: " + count.file);
+ }
if (ghRes.truncated) {
- await this.getTruncatedTree(sha, tree, parentPath, count);
+ await this.getTruncatedTree(sha, progress, tree, parentPath, count);
}
span.end();
return tree;
@@ -202,6 +205,7 @@ export default class GitHubStream extends GitHubBase {
private async getTruncatedTree(
sha: string,
+ progress?: (status: string) => void,
truncatedTree: Tree = {},
parentPath: string = "",
count = {
@@ -230,6 +234,9 @@ export default class GitHubStream extends GitHubBase {
}
count.file += data.tree.length;
+ if (progress) {
+ progress("List file: " + count.file);
+ }
if (data.tree.length < 100 && count.request < 200) {
const promises: Promise[] = [];
for (const file of data.tree) {
@@ -238,6 +245,7 @@ export default class GitHubStream extends GitHubBase {
promises.push(
this.getTruncatedTree(
file.sha,
+ progress,
truncatedTree,
elementPath,
count,
diff --git a/src/core/source/Zip.ts b/src/core/source/Zip.ts
index f914b98..acd965e 100644
--- a/src/core/source/Zip.ts
+++ b/src/core/source/Zip.ts
@@ -12,8 +12,16 @@ export default class Zip implements SourceBase {
this.url = data.url;
}
- async getFiles() {
- return storage.listFiles(this.repoId);
+ async getFiles(progress?: (status: string) => void) {
+ let nbFiles = 0;
+ return storage.listFiles(this.repoId, "", {
+ onEntry: () => {
+ if (progress) {
+ nbFiles++;
+ progress("List file: " + nbFiles);
+ }
+ },
+ });
}
async getFileContent(file: AnonymizedFile): Promise {
diff --git a/src/core/storage/S3.ts b/src/core/storage/S3.ts
index c5323a2..d2537e9 100644
--- a/src/core/storage/S3.ts
+++ b/src/core/storage/S3.ts
@@ -10,7 +10,7 @@ import config from "../../config";
import { pipeline, Readable, Transform } from "stream";
import ArchiveStreamToS3 from "decompress-stream-to-s3";
import { Response } from "express";
-import { contentType } from "mime-types";
+import { lookup } from "mime-types";
import * as archiver from "archiver";
import { trace } from "@opentelemetry/api";
import { dirname, basename, join } from "path";
@@ -170,7 +170,7 @@ export default class S3Storage extends StorageBase {
lastModified: info.LastModified,
contentType: info.ContentType
? info.ContentType
- : (contentType(path) as string),
+ : (lookup(path) as string),
};
} finally {
span.end();
@@ -226,7 +226,7 @@ export default class S3Storage extends StorageBase {
Bucket: config.S3_BUCKET,
Key: join(this.repoPath(repoId), path),
Body: data,
- ContentType: contentType(path).toString(),
+ ContentType: lookup(path).toString(),
};
if (source) {
params.Tagging = `source=${source}`;
diff --git a/src/queue/processes/downloadRepository.ts b/src/queue/processes/downloadRepository.ts
index d31f7be..532c1d7 100644
--- a/src/queue/processes/downloadRepository.ts
+++ b/src/queue/processes/downloadRepository.ts
@@ -17,18 +17,43 @@ export default async function (job: SandboxedJob) {
const span = trace.getTracer("ano-file").startSpan("proc.downloadRepository");
span.setAttribute("repoId", job.data.repoId);
console.log(`[QUEUE] ${job.data.repoId} is going to be downloaded`);
+ let statusInterval: any = null;
+ await connect();
+ const repo = await getRepository(job.data.repoId);
try {
- await connect();
- const repo = await getRepository(job.data.repoId);
- job.updateProgress({ status: "get_repo" });
+ let progress: any = null;
+ statusInterval = setInterval(async () => {
+ try {
+ if (
+ repo.status == RepositoryStatus.READY ||
+ repo.status == RepositoryStatus.ERROR
+ ) {
+ return clearInterval(statusInterval);
+ }
+ if (repo.status && repo.model.statusMessage !== progress?.status) {
+ console.log(
+ `[QUEUE] Progress: ${job.data.repoId} ${progress.status}`
+ );
+ await repo.updateStatus(repo.status, progress?.status || "");
+ }
+ } catch (_) {
+ // ignore error
+ }
+ }, 500);
+ function updateProgress(obj: { status: string } | string) {
+ const o = typeof obj === "string" ? { status: obj } : obj;
+ progress = o;
+ job.updateProgress(o);
+ }
+ updateProgress({ status: "get_repo" });
try {
- job.updateProgress({ status: "resetSate" });
+ updateProgress({ status: "resetSate" });
await repo.resetSate(RepositoryStatus.PREPARING, "");
- job.updateProgress({ status: "download" });
- await repo.anonymize();
+ await repo.anonymize(updateProgress);
+ updateProgress({ status: RepositoryStatus.READY });
console.log(`[QUEUE] ${job.data.repoId} is downloaded`);
} catch (error) {
- job.updateProgress({ status: "error" });
+ updateProgress({ status: "error" });
if (error instanceof Error) {
span.recordException(error as Exception);
await repo.updateStatus(RepositoryStatus.ERROR, error.message);
@@ -38,11 +63,14 @@ export default async function (job: SandboxedJob) {
}
throw error;
}
- } catch (error) {
- console.error(error)
+ } catch (error: any) {
+ console.error(error);
+ job.updateProgress({ status: "error", error: error });
+ await repo.updateStatus(RepositoryStatus.ERROR, error.message);
span.recordException(error as Exception);
console.log(`[QUEUE] ${job.data.repoId} is finished with an error`);
} finally {
+ clearInterval(statusInterval);
span.end();
}
}
diff --git a/src/server/routes/repository-private.ts b/src/server/routes/repository-private.ts
index 19e5f56..fdf5edf 100644
--- a/src/server/routes/repository-private.ts
+++ b/src/server/routes/repository-private.ts
@@ -17,6 +17,7 @@ import User from "../../core/User";
import { RepositoryStatus } from "../../core/types";
import { IUserDocument } from "../../core/model/users/users.types";
import { checkToken } from "../../core/GitHubUtils";
+import config from "../../config";
const router = express.Router();
@@ -374,7 +375,42 @@ router.post(
}
updateRepoModel(repo.model, repoUpdate);
- repo.source.type = "GitHubStream";
+
+ const r = gh(repoUpdate.fullName);
+ if (!r?.owner || !r?.name) {
+ await repo.resetSate(RepositoryStatus.ERROR, "repo_not_found");
+ throw new AnonymousError("repo_not_found", {
+ object: req.body,
+ httpStatus: 404,
+ });
+ }
+ const repository = await getRepositoryFromGitHub({
+ accessToken: user.accessToken,
+ owner: r.owner,
+ repo: r.name,
+ });
+
+ if (!repository) {
+ await repo.resetSate(RepositoryStatus.ERROR, "repo_not_found");
+ throw new AnonymousError("repo_not_found", {
+ object: req.body,
+ httpStatus: 404,
+ });
+ }
+ console.log(repository);
+ if (repository.size) {
+ if (
+ repository.size > config.AUTO_DOWNLOAD_REPO_SIZE &&
+ repo.model.source.type == "GitHubDownload"
+ ) {
+ repo.model.source.type = "GitHubStream";
+ } else if (
+ repository.size < config.AUTO_DOWNLOAD_REPO_SIZE &&
+ repo.model.source.type == "GitHubStream"
+ ) {
+ repo.model.source.type = "GitHubDownload";
+ }
+ }
const removeRepoFromConference = async (conferenceID: string) => {
const conf = await ConferenceModel.findOne({
@@ -485,25 +521,27 @@ router.post("/", async (req: express.Request, res: express.Response) => {
repo.source.accessToken = user.accessToken;
repo.source.repositoryId = repository.model.id;
repo.source.repositoryName = repoUpdate.fullName;
+ console.log(repository.size);
+ if (
+ repository.size !== undefined &&
+ repository.size < config.AUTO_DOWNLOAD_REPO_SIZE
+ ) {
+ repo.source.type = "GitHubDownload";
+ }
+ if (repository.size) {
+ if (
+ repository.size > config.AUTO_DOWNLOAD_REPO_SIZE &&
+ repo.source.type == "GitHubDownload"
+ ) {
+ repo.source.type = "GitHubStream";
+ } else if (
+ repository.size < config.AUTO_DOWNLOAD_REPO_SIZE &&
+ repo.source.type == "GitHubStream"
+ ) {
+ repo.source.type = "GitHubDownload";
+ }
+ }
- // if (repo.source.type === "GitHubDownload") {
- // // details.size is in kilobytes
- // if (
- // repository.size === undefined ||
- // repository.size > config.MAX_REPO_SIZE
- // ) {
- // throw new AnonymousError("invalid_mode", {
- // object: repository,
- // httpStatus: 400,
- // });
- // }
- // }
- // if (
- // repository.size !== undefined &&
- // repository.size < config.AUTO_DOWNLOAD_REPO_SIZE
- // ) {
- // repo.source.type = "GitHubDownload";
- // }
repo.conference = repoUpdate.conference;
await repo.save();
diff --git a/src/server/routes/webview.ts b/src/server/routes/webview.ts
index 8ef2fa8..f5ae1a1 100644
--- a/src/server/routes/webview.ts
+++ b/src/server/routes/webview.ts
@@ -100,7 +100,7 @@ async function webView(req: express.Request, res: express.Response) {
if (f.extension() == "md") {
const content = await streamToString(await f.anonymizedContent());
res
- .contentType("html")
+ .contentType("text/html")
.send(marked.marked(content, { headerIds: false, mangle: false }));
} else {
f.send(res);
diff --git a/src/streamer/route.ts b/src/streamer/route.ts
index 257c8ac..537cdf4 100644
--- a/src/streamer/route.ts
+++ b/src/streamer/route.ts
@@ -2,9 +2,7 @@ import * as express from "express";
import GitHubStream from "../core/source/GitHubStream";
import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils";
import { handleError } from "../server/routes/route-utils";
-import { contentType } from "mime-types";
-import storage from "../core/storage";
-import AnonymizedFile from "../core/AnonymizedFile";
+import { lookup } from "mime-types";
export const router = express.Router();
@@ -33,7 +31,7 @@ router.post("/", async (req: express.Request, res: express.Response) => {
() => fileSha
);
try {
- const mime = contentType(filePath);
+ const mime = lookup(filePath);
if (mime && !filePath.endsWith(".ts")) {
res.contentType(mime);
} else if (isTextFile(filePath)) {