fix: fix webview & improve download progress

This commit is contained in:
tdurieux
2024-04-03 18:25:33 +01:00
parent 83c55fdfbf
commit 1d4bab7866
11 changed files with 206 additions and 75 deletions

View File

@@ -21,10 +21,9 @@
>
<span>
{{repo.status | title}}
<span
ng-if="repo.status == 'download' && repo.statusMessage"
ng-bind="repo.statusMessage | humanFileSize"
></span>
<span ng-if="repo.statusMessage"
>: {{repo.statusMessage | title}}</span
>
</span>
</div>
</div>

View File

@@ -11,7 +11,6 @@ import { anonymizePath, isTextFile } from "./anonymize-utils";
import AnonymousError from "./AnonymousError";
import { handleError } from "../server/routes/route-utils";
import got from "got";
import storage from "./storage";
/**
* Represent a file in a anonymized repository

View File

@@ -16,10 +16,14 @@ import AnonymousError from "./AnonymousError";
import { downloadQueue } from "../queue";
import { isConnected } from "../server/database";
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
import { GitHubRepository } from "./source/GitHubRepository";
import {
getRepositoryFromGitHub,
GitHubRepository,
} from "./source/GitHubRepository";
import { trace } from "@opentelemetry/api";
import { getToken } from "./GitHubUtils";
import { FILE_TYPE } from "./storage/Storage";
import config from "../config";
function anonymizeTreeRecursive(
tree: TreeElement,
@@ -129,7 +133,11 @@ export default class Repository {
* @param opt force to get an updated list of files
* @returns The file tree
*/
async files(opt: { force?: boolean } = { force: false }): Promise<Tree> {
async files(
opt: { force?: boolean; progress?: (status: string) => void } = {
force: false,
}
): Promise<Tree> {
const span = trace.getTracer("ano-file").startSpan("Repository.files");
span.setAttribute("repoId", this.repoId);
try {
@@ -147,7 +155,7 @@ export default class Repository {
) {
return this._model.originalFiles;
}
const files = await this.source.getFiles();
const files = await this.source.getFiles(opt.progress);
this._model.originalFiles = files;
this._model.size = { storage: 0, file: 0 };
await this.computeSize();
@@ -306,6 +314,25 @@ export default class Repository {
`[UPDATE] ${this._model.repoId} will be updated to ${newCommit}`
);
const repository = await getRepositoryFromGitHub({
accessToken: await this.getToken(),
owner: this.source.data.organization,
repo: this.source.data.repoName,
});
if (repository.size) {
if (
repository.size > config.AUTO_DOWNLOAD_REPO_SIZE &&
this.model.source.type == "GitHubDownload"
) {
this.model.source.type = "GitHubStream";
} else if (
repository.size < config.AUTO_DOWNLOAD_REPO_SIZE &&
this.model.source.type == "GitHubStream"
) {
this.model.source.type = "GitHubDownload";
}
}
await this.resetSate(RepositoryStatus.PREPARING);
await downloadQueue.add(this.repoId, this, {
jobId: this.repoId,
@@ -320,16 +347,20 @@ export default class Repository {
*
* @returns void
*/
async anonymize() {
async anonymize(progress?: (status: string) => void) {
const span = trace.getTracer("ano-file").startSpan("Repository.anonymize");
span.setAttribute("repoId", this.repoId);
if (this.status === RepositoryStatus.READY) {
span.end();
return;
}
await this.updateStatus(RepositoryStatus.PREPARING);
await this.files();
await this.updateStatus(RepositoryStatus.DOWNLOAD);
await this.files({
force: false,
progress,
});
await this.updateStatus(RepositoryStatus.READY);
await this.computeSize();
span.end();
}

View File

@@ -42,25 +42,16 @@ export default class GitHubDownload extends GitHubBase {
});
}
await storage.mk(this.data.repoId);
let downloadProgress: { transferred: number } | undefined = undefined;
let progressTimeout;
let inDownload = true;
async function updateProgress() {
if (inDownload) {
if (progress) {
progress(downloadProgress?.transferred?.toString() || "");
}
progressTimeout = setTimeout(updateProgress, 1500);
}
}
updateProgress();
try {
const downloadStream = got.stream(response.url);
downloadStream.addListener("downloadProgress", async (p) => {
downloadProgress = p;
});
downloadStream.addListener(
"downloadProgress",
(p: { transferred?: number }) => {
if (progress && p.transferred) {
progress("Repository download: " + humanFileSize(p.transferred));
}
}
);
await storage.extractZip(
this.data.repoId,
"",
@@ -74,9 +65,6 @@ export default class GitHubDownload extends GitHubBase {
cause: error as Error,
object: this.data,
});
} finally {
inDownload = false;
clearTimeout(progressTimeout);
}
} finally {
span.end();
@@ -116,6 +104,40 @@ export default class GitHubDownload extends GitHubBase {
if ((await storage.exists(this.data.repoId)) === FILE_TYPE.NOT_FOUND) {
await this.download(progress);
}
return storage.listFiles(this.data.repoId);
let nbFiles = 0;
return storage.listFiles(this.data.repoId, "", {
onEntry: () => {
if (progress) {
nbFiles++;
progress("List file: " + nbFiles);
}
},
});
}
}
function humanFileSize(bytes: number, si = false, dp = 1) {
const thresh = si ? 1000 : 1024;
bytes = bytes / 8;
if (Math.abs(bytes) < thresh) {
return bytes + "B";
}
const units = si
? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
: ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
let u = -1;
const r = 10 ** dp;
do {
bytes /= thresh;
++u;
} while (
Math.round(Math.abs(bytes) * r) / r >= thresh &&
u < units.length - 1
);
return bytes.toFixed(dp) + "" + units[u];
}

View File

@@ -19,7 +19,7 @@ export default class GitHubStream extends GitHubBase {
super(data);
}
downloadFile(token: string, sha: string) {
downloadFile(token: string, sha: string) {
const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile");
span.setAttribute("sha", sha);
const oct = octokit(token);
@@ -128,11 +128,11 @@ export default class GitHubStream extends GitHubBase {
}
}
async getFiles() {
async getFiles(progress?: (status: string) => void) {
const span = trace.getTracer("ano-file").startSpan("GHStream.getFiles");
span.setAttribute("repoId", this.data.repoId);
try {
return this.getTree(this.data.commit);
return this.getTree(this.data.commit, progress);
} finally {
span.end();
}
@@ -140,6 +140,7 @@ export default class GitHubStream extends GitHubBase {
private async getTree(
sha: string,
progress?: (status: string) => void,
truncatedTree: Tree = {},
parentPath: string = "",
count = {
@@ -155,7 +156,6 @@ export default class GitHubStream extends GitHubBase {
count.request++;
ghRes = await this.getGHTree(sha, { recursive: true });
} catch (error) {
console.error(error);
span.recordException(error as Error);
if ((error as any).status == 409) {
// cannot be empty otherwise it would try to download it again
@@ -176,8 +176,11 @@ export default class GitHubStream extends GitHubBase {
}
const tree = this.tree2Tree(ghRes.tree, truncatedTree, parentPath);
count.file += ghRes.tree.length;
if (progress) {
progress("List file: " + count.file);
}
if (ghRes.truncated) {
await this.getTruncatedTree(sha, tree, parentPath, count);
await this.getTruncatedTree(sha, progress, tree, parentPath, count);
}
span.end();
return tree;
@@ -202,6 +205,7 @@ export default class GitHubStream extends GitHubBase {
private async getTruncatedTree(
sha: string,
progress?: (status: string) => void,
truncatedTree: Tree = {},
parentPath: string = "",
count = {
@@ -230,6 +234,9 @@ export default class GitHubStream extends GitHubBase {
}
count.file += data.tree.length;
if (progress) {
progress("List file: " + count.file);
}
if (data.tree.length < 100 && count.request < 200) {
const promises: Promise<any>[] = [];
for (const file of data.tree) {
@@ -238,6 +245,7 @@ export default class GitHubStream extends GitHubBase {
promises.push(
this.getTruncatedTree(
file.sha,
progress,
truncatedTree,
elementPath,
count,

View File

@@ -12,8 +12,16 @@ export default class Zip implements SourceBase {
this.url = data.url;
}
async getFiles() {
return storage.listFiles(this.repoId);
async getFiles(progress?: (status: string) => void) {
let nbFiles = 0;
return storage.listFiles(this.repoId, "", {
onEntry: () => {
if (progress) {
nbFiles++;
progress("List file: " + nbFiles);
}
},
});
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {

View File

@@ -10,7 +10,7 @@ import config from "../../config";
import { pipeline, Readable, Transform } from "stream";
import ArchiveStreamToS3 from "decompress-stream-to-s3";
import { Response } from "express";
import { contentType } from "mime-types";
import { lookup } from "mime-types";
import * as archiver from "archiver";
import { trace } from "@opentelemetry/api";
import { dirname, basename, join } from "path";
@@ -170,7 +170,7 @@ export default class S3Storage extends StorageBase {
lastModified: info.LastModified,
contentType: info.ContentType
? info.ContentType
: (contentType(path) as string),
: (lookup(path) as string),
};
} finally {
span.end();
@@ -226,7 +226,7 @@ export default class S3Storage extends StorageBase {
Bucket: config.S3_BUCKET,
Key: join(this.repoPath(repoId), path),
Body: data,
ContentType: contentType(path).toString(),
ContentType: lookup(path).toString(),
};
if (source) {
params.Tagging = `source=${source}`;

View File

@@ -17,18 +17,43 @@ export default async function (job: SandboxedJob<Repository, void>) {
const span = trace.getTracer("ano-file").startSpan("proc.downloadRepository");
span.setAttribute("repoId", job.data.repoId);
console.log(`[QUEUE] ${job.data.repoId} is going to be downloaded`);
let statusInterval: any = null;
await connect();
const repo = await getRepository(job.data.repoId);
try {
await connect();
const repo = await getRepository(job.data.repoId);
job.updateProgress({ status: "get_repo" });
let progress: any = null;
statusInterval = setInterval(async () => {
try {
if (
repo.status == RepositoryStatus.READY ||
repo.status == RepositoryStatus.ERROR
) {
return clearInterval(statusInterval);
}
if (repo.status && repo.model.statusMessage !== progress?.status) {
console.log(
`[QUEUE] Progress: ${job.data.repoId} ${progress.status}`
);
await repo.updateStatus(repo.status, progress?.status || "");
}
} catch (_) {
// ignore error
}
}, 500);
function updateProgress(obj: { status: string } | string) {
const o = typeof obj === "string" ? { status: obj } : obj;
progress = o;
job.updateProgress(o);
}
updateProgress({ status: "get_repo" });
try {
job.updateProgress({ status: "resetSate" });
updateProgress({ status: "resetSate" });
await repo.resetSate(RepositoryStatus.PREPARING, "");
job.updateProgress({ status: "download" });
await repo.anonymize();
await repo.anonymize(updateProgress);
updateProgress({ status: RepositoryStatus.READY });
console.log(`[QUEUE] ${job.data.repoId} is downloaded`);
} catch (error) {
job.updateProgress({ status: "error" });
updateProgress({ status: "error" });
if (error instanceof Error) {
span.recordException(error as Exception);
await repo.updateStatus(RepositoryStatus.ERROR, error.message);
@@ -38,11 +63,14 @@ export default async function (job: SandboxedJob<Repository, void>) {
}
throw error;
}
} catch (error) {
console.error(error)
} catch (error: any) {
console.error(error);
job.updateProgress({ status: "error", error: error });
await repo.updateStatus(RepositoryStatus.ERROR, error.message);
span.recordException(error as Exception);
console.log(`[QUEUE] ${job.data.repoId} is finished with an error`);
} finally {
clearInterval(statusInterval);
span.end();
}
}

View File

@@ -17,6 +17,7 @@ import User from "../../core/User";
import { RepositoryStatus } from "../../core/types";
import { IUserDocument } from "../../core/model/users/users.types";
import { checkToken } from "../../core/GitHubUtils";
import config from "../../config";
const router = express.Router();
@@ -374,7 +375,42 @@ router.post(
}
updateRepoModel(repo.model, repoUpdate);
repo.source.type = "GitHubStream";
const r = gh(repoUpdate.fullName);
if (!r?.owner || !r?.name) {
await repo.resetSate(RepositoryStatus.ERROR, "repo_not_found");
throw new AnonymousError("repo_not_found", {
object: req.body,
httpStatus: 404,
});
}
const repository = await getRepositoryFromGitHub({
accessToken: user.accessToken,
owner: r.owner,
repo: r.name,
});
if (!repository) {
await repo.resetSate(RepositoryStatus.ERROR, "repo_not_found");
throw new AnonymousError("repo_not_found", {
object: req.body,
httpStatus: 404,
});
}
console.log(repository);
if (repository.size) {
if (
repository.size > config.AUTO_DOWNLOAD_REPO_SIZE &&
repo.model.source.type == "GitHubDownload"
) {
repo.model.source.type = "GitHubStream";
} else if (
repository.size < config.AUTO_DOWNLOAD_REPO_SIZE &&
repo.model.source.type == "GitHubStream"
) {
repo.model.source.type = "GitHubDownload";
}
}
const removeRepoFromConference = async (conferenceID: string) => {
const conf = await ConferenceModel.findOne({
@@ -485,25 +521,27 @@ router.post("/", async (req: express.Request, res: express.Response) => {
repo.source.accessToken = user.accessToken;
repo.source.repositoryId = repository.model.id;
repo.source.repositoryName = repoUpdate.fullName;
console.log(repository.size);
if (
repository.size !== undefined &&
repository.size < config.AUTO_DOWNLOAD_REPO_SIZE
) {
repo.source.type = "GitHubDownload";
}
if (repository.size) {
if (
repository.size > config.AUTO_DOWNLOAD_REPO_SIZE &&
repo.source.type == "GitHubDownload"
) {
repo.source.type = "GitHubStream";
} else if (
repository.size < config.AUTO_DOWNLOAD_REPO_SIZE &&
repo.source.type == "GitHubStream"
) {
repo.source.type = "GitHubDownload";
}
}
// if (repo.source.type === "GitHubDownload") {
// // details.size is in kilobytes
// if (
// repository.size === undefined ||
// repository.size > config.MAX_REPO_SIZE
// ) {
// throw new AnonymousError("invalid_mode", {
// object: repository,
// httpStatus: 400,
// });
// }
// }
// if (
// repository.size !== undefined &&
// repository.size < config.AUTO_DOWNLOAD_REPO_SIZE
// ) {
// repo.source.type = "GitHubDownload";
// }
repo.conference = repoUpdate.conference;
await repo.save();

View File

@@ -100,7 +100,7 @@ async function webView(req: express.Request, res: express.Response) {
if (f.extension() == "md") {
const content = await streamToString(await f.anonymizedContent());
res
.contentType("html")
.contentType("text/html")
.send(marked.marked(content, { headerIds: false, mangle: false }));
} else {
f.send(res);

View File

@@ -2,9 +2,7 @@ import * as express from "express";
import GitHubStream from "../core/source/GitHubStream";
import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils";
import { handleError } from "../server/routes/route-utils";
import { contentType } from "mime-types";
import storage from "../core/storage";
import AnonymizedFile from "../core/AnonymizedFile";
import { lookup } from "mime-types";
export const router = express.Router();
@@ -33,7 +31,7 @@ router.post("/", async (req: express.Request, res: express.Response) => {
() => fileSha
);
try {
const mime = contentType(filePath);
const mime = lookup(filePath);
if (mime && !filePath.endsWith(".ts")) {
res.contentType(mime);
} else if (isTextFile(filePath)) {