From 1d4bab7866e11cd1e0050caa028322cc162f5121 Mon Sep 17 00:00:00 2001 From: tdurieux Date: Wed, 3 Apr 2024 18:25:33 +0100 Subject: [PATCH] fix: fix webview & improve download progress --- public/partials/status.htm | 7 +-- src/core/AnonymizedFile.ts | 1 - src/core/Repository.ts | 43 +++++++++++-- src/core/source/GitHubDownload.ts | 64 ++++++++++++------- src/core/source/GitHubStream.ts | 18 ++++-- src/core/source/Zip.ts | 12 +++- src/core/storage/S3.ts | 6 +- src/queue/processes/downloadRepository.ts | 46 +++++++++++--- src/server/routes/repository-private.ts | 76 +++++++++++++++++------ src/server/routes/webview.ts | 2 +- src/streamer/route.ts | 6 +- 11 files changed, 206 insertions(+), 75 deletions(-) diff --git a/public/partials/status.htm b/public/partials/status.htm index efe57d4..e3f9efe 100644 --- a/public/partials/status.htm +++ b/public/partials/status.htm @@ -21,10 +21,9 @@ > {{repo.status | title}} - + : {{repo.statusMessage | title}} diff --git a/src/core/AnonymizedFile.ts b/src/core/AnonymizedFile.ts index 885f0fd..e45dd5a 100644 --- a/src/core/AnonymizedFile.ts +++ b/src/core/AnonymizedFile.ts @@ -11,7 +11,6 @@ import { anonymizePath, isTextFile } from "./anonymize-utils"; import AnonymousError from "./AnonymousError"; import { handleError } from "../server/routes/route-utils"; import got from "got"; -import storage from "./storage"; /** * Represent a file in a anonymized repository diff --git a/src/core/Repository.ts b/src/core/Repository.ts index f1f462b..2fd6893 100644 --- a/src/core/Repository.ts +++ b/src/core/Repository.ts @@ -16,10 +16,14 @@ import AnonymousError from "./AnonymousError"; import { downloadQueue } from "../queue"; import { isConnected } from "../server/database"; import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model"; -import { GitHubRepository } from "./source/GitHubRepository"; +import { + getRepositoryFromGitHub, + GitHubRepository, +} from "./source/GitHubRepository"; import { trace } from "@opentelemetry/api"; import { getToken } from "./GitHubUtils"; import { FILE_TYPE } from "./storage/Storage"; +import config from "../config"; function anonymizeTreeRecursive( tree: TreeElement, @@ -129,7 +133,11 @@ export default class Repository { * @param opt force to get an updated list of files * @returns The file tree */ - async files(opt: { force?: boolean } = { force: false }): Promise { + async files( + opt: { force?: boolean; progress?: (status: string) => void } = { + force: false, + } + ): Promise { const span = trace.getTracer("ano-file").startSpan("Repository.files"); span.setAttribute("repoId", this.repoId); try { @@ -147,7 +155,7 @@ export default class Repository { ) { return this._model.originalFiles; } - const files = await this.source.getFiles(); + const files = await this.source.getFiles(opt.progress); this._model.originalFiles = files; this._model.size = { storage: 0, file: 0 }; await this.computeSize(); @@ -306,6 +314,25 @@ export default class Repository { `[UPDATE] ${this._model.repoId} will be updated to ${newCommit}` ); + const repository = await getRepositoryFromGitHub({ + accessToken: await this.getToken(), + owner: this.source.data.organization, + repo: this.source.data.repoName, + }); + if (repository.size) { + if ( + repository.size > config.AUTO_DOWNLOAD_REPO_SIZE && + this.model.source.type == "GitHubDownload" + ) { + this.model.source.type = "GitHubStream"; + } else if ( + repository.size < config.AUTO_DOWNLOAD_REPO_SIZE && + this.model.source.type == "GitHubStream" + ) { + this.model.source.type = "GitHubDownload"; + } + } + await this.resetSate(RepositoryStatus.PREPARING); await downloadQueue.add(this.repoId, this, { jobId: this.repoId, @@ -320,16 +347,20 @@ export default class Repository { * * @returns void */ - async anonymize() { + async anonymize(progress?: (status: string) => void) { const span = trace.getTracer("ano-file").startSpan("Repository.anonymize"); span.setAttribute("repoId", this.repoId); if (this.status === RepositoryStatus.READY) { span.end(); return; } - await this.updateStatus(RepositoryStatus.PREPARING); - await this.files(); + await this.updateStatus(RepositoryStatus.DOWNLOAD); + await this.files({ + force: false, + progress, + }); await this.updateStatus(RepositoryStatus.READY); + await this.computeSize(); span.end(); } diff --git a/src/core/source/GitHubDownload.ts b/src/core/source/GitHubDownload.ts index e2abe64..4c39b5a 100644 --- a/src/core/source/GitHubDownload.ts +++ b/src/core/source/GitHubDownload.ts @@ -42,25 +42,16 @@ export default class GitHubDownload extends GitHubBase { }); } await storage.mk(this.data.repoId); - let downloadProgress: { transferred: number } | undefined = undefined; - let progressTimeout; - let inDownload = true; - - async function updateProgress() { - if (inDownload) { - if (progress) { - progress(downloadProgress?.transferred?.toString() || ""); - } - progressTimeout = setTimeout(updateProgress, 1500); - } - } - updateProgress(); - try { const downloadStream = got.stream(response.url); - downloadStream.addListener("downloadProgress", async (p) => { - downloadProgress = p; - }); + downloadStream.addListener( + "downloadProgress", + (p: { transferred?: number }) => { + if (progress && p.transferred) { + progress("Repository download: " + humanFileSize(p.transferred)); + } + } + ); await storage.extractZip( this.data.repoId, "", @@ -74,9 +65,6 @@ export default class GitHubDownload extends GitHubBase { cause: error as Error, object: this.data, }); - } finally { - inDownload = false; - clearTimeout(progressTimeout); } } finally { span.end(); @@ -116,6 +104,40 @@ export default class GitHubDownload extends GitHubBase { if ((await storage.exists(this.data.repoId)) === FILE_TYPE.NOT_FOUND) { await this.download(progress); } - return storage.listFiles(this.data.repoId); + let nbFiles = 0; + return storage.listFiles(this.data.repoId, "", { + onEntry: () => { + if (progress) { + nbFiles++; + progress("List file: " + nbFiles); + } + }, + }); } } + +function humanFileSize(bytes: number, si = false, dp = 1) { + const thresh = si ? 1000 : 1024; + + bytes = bytes / 8; + + if (Math.abs(bytes) < thresh) { + return bytes + "B"; + } + + const units = si + ? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"] + : ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]; + let u = -1; + const r = 10 ** dp; + + do { + bytes /= thresh; + ++u; + } while ( + Math.round(Math.abs(bytes) * r) / r >= thresh && + u < units.length - 1 + ); + + return bytes.toFixed(dp) + "" + units[u]; +} diff --git a/src/core/source/GitHubStream.ts b/src/core/source/GitHubStream.ts index aecb5e5..e87b87b 100644 --- a/src/core/source/GitHubStream.ts +++ b/src/core/source/GitHubStream.ts @@ -19,7 +19,7 @@ export default class GitHubStream extends GitHubBase { super(data); } - downloadFile(token: string, sha: string) { + downloadFile(token: string, sha: string) { const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile"); span.setAttribute("sha", sha); const oct = octokit(token); @@ -128,11 +128,11 @@ export default class GitHubStream extends GitHubBase { } } - async getFiles() { + async getFiles(progress?: (status: string) => void) { const span = trace.getTracer("ano-file").startSpan("GHStream.getFiles"); span.setAttribute("repoId", this.data.repoId); try { - return this.getTree(this.data.commit); + return this.getTree(this.data.commit, progress); } finally { span.end(); } @@ -140,6 +140,7 @@ export default class GitHubStream extends GitHubBase { private async getTree( sha: string, + progress?: (status: string) => void, truncatedTree: Tree = {}, parentPath: string = "", count = { @@ -155,7 +156,6 @@ export default class GitHubStream extends GitHubBase { count.request++; ghRes = await this.getGHTree(sha, { recursive: true }); } catch (error) { - console.error(error); span.recordException(error as Error); if ((error as any).status == 409) { // cannot be empty otherwise it would try to download it again @@ -176,8 +176,11 @@ export default class GitHubStream extends GitHubBase { } const tree = this.tree2Tree(ghRes.tree, truncatedTree, parentPath); count.file += ghRes.tree.length; + if (progress) { + progress("List file: " + count.file); + } if (ghRes.truncated) { - await this.getTruncatedTree(sha, tree, parentPath, count); + await this.getTruncatedTree(sha, progress, tree, parentPath, count); } span.end(); return tree; @@ -202,6 +205,7 @@ export default class GitHubStream extends GitHubBase { private async getTruncatedTree( sha: string, + progress?: (status: string) => void, truncatedTree: Tree = {}, parentPath: string = "", count = { @@ -230,6 +234,9 @@ export default class GitHubStream extends GitHubBase { } count.file += data.tree.length; + if (progress) { + progress("List file: " + count.file); + } if (data.tree.length < 100 && count.request < 200) { const promises: Promise[] = []; for (const file of data.tree) { @@ -238,6 +245,7 @@ export default class GitHubStream extends GitHubBase { promises.push( this.getTruncatedTree( file.sha, + progress, truncatedTree, elementPath, count, diff --git a/src/core/source/Zip.ts b/src/core/source/Zip.ts index f914b98..acd965e 100644 --- a/src/core/source/Zip.ts +++ b/src/core/source/Zip.ts @@ -12,8 +12,16 @@ export default class Zip implements SourceBase { this.url = data.url; } - async getFiles() { - return storage.listFiles(this.repoId); + async getFiles(progress?: (status: string) => void) { + let nbFiles = 0; + return storage.listFiles(this.repoId, "", { + onEntry: () => { + if (progress) { + nbFiles++; + progress("List file: " + nbFiles); + } + }, + }); } async getFileContent(file: AnonymizedFile): Promise { diff --git a/src/core/storage/S3.ts b/src/core/storage/S3.ts index c5323a2..d2537e9 100644 --- a/src/core/storage/S3.ts +++ b/src/core/storage/S3.ts @@ -10,7 +10,7 @@ import config from "../../config"; import { pipeline, Readable, Transform } from "stream"; import ArchiveStreamToS3 from "decompress-stream-to-s3"; import { Response } from "express"; -import { contentType } from "mime-types"; +import { lookup } from "mime-types"; import * as archiver from "archiver"; import { trace } from "@opentelemetry/api"; import { dirname, basename, join } from "path"; @@ -170,7 +170,7 @@ export default class S3Storage extends StorageBase { lastModified: info.LastModified, contentType: info.ContentType ? info.ContentType - : (contentType(path) as string), + : (lookup(path) as string), }; } finally { span.end(); @@ -226,7 +226,7 @@ export default class S3Storage extends StorageBase { Bucket: config.S3_BUCKET, Key: join(this.repoPath(repoId), path), Body: data, - ContentType: contentType(path).toString(), + ContentType: lookup(path).toString(), }; if (source) { params.Tagging = `source=${source}`; diff --git a/src/queue/processes/downloadRepository.ts b/src/queue/processes/downloadRepository.ts index d31f7be..532c1d7 100644 --- a/src/queue/processes/downloadRepository.ts +++ b/src/queue/processes/downloadRepository.ts @@ -17,18 +17,43 @@ export default async function (job: SandboxedJob) { const span = trace.getTracer("ano-file").startSpan("proc.downloadRepository"); span.setAttribute("repoId", job.data.repoId); console.log(`[QUEUE] ${job.data.repoId} is going to be downloaded`); + let statusInterval: any = null; + await connect(); + const repo = await getRepository(job.data.repoId); try { - await connect(); - const repo = await getRepository(job.data.repoId); - job.updateProgress({ status: "get_repo" }); + let progress: any = null; + statusInterval = setInterval(async () => { + try { + if ( + repo.status == RepositoryStatus.READY || + repo.status == RepositoryStatus.ERROR + ) { + return clearInterval(statusInterval); + } + if (repo.status && repo.model.statusMessage !== progress?.status) { + console.log( + `[QUEUE] Progress: ${job.data.repoId} ${progress.status}` + ); + await repo.updateStatus(repo.status, progress?.status || ""); + } + } catch (_) { + // ignore error + } + }, 500); + function updateProgress(obj: { status: string } | string) { + const o = typeof obj === "string" ? { status: obj } : obj; + progress = o; + job.updateProgress(o); + } + updateProgress({ status: "get_repo" }); try { - job.updateProgress({ status: "resetSate" }); + updateProgress({ status: "resetSate" }); await repo.resetSate(RepositoryStatus.PREPARING, ""); - job.updateProgress({ status: "download" }); - await repo.anonymize(); + await repo.anonymize(updateProgress); + updateProgress({ status: RepositoryStatus.READY }); console.log(`[QUEUE] ${job.data.repoId} is downloaded`); } catch (error) { - job.updateProgress({ status: "error" }); + updateProgress({ status: "error" }); if (error instanceof Error) { span.recordException(error as Exception); await repo.updateStatus(RepositoryStatus.ERROR, error.message); @@ -38,11 +63,14 @@ export default async function (job: SandboxedJob) { } throw error; } - } catch (error) { - console.error(error) + } catch (error: any) { + console.error(error); + job.updateProgress({ status: "error", error: error }); + await repo.updateStatus(RepositoryStatus.ERROR, error.message); span.recordException(error as Exception); console.log(`[QUEUE] ${job.data.repoId} is finished with an error`); } finally { + clearInterval(statusInterval); span.end(); } } diff --git a/src/server/routes/repository-private.ts b/src/server/routes/repository-private.ts index 19e5f56..fdf5edf 100644 --- a/src/server/routes/repository-private.ts +++ b/src/server/routes/repository-private.ts @@ -17,6 +17,7 @@ import User from "../../core/User"; import { RepositoryStatus } from "../../core/types"; import { IUserDocument } from "../../core/model/users/users.types"; import { checkToken } from "../../core/GitHubUtils"; +import config from "../../config"; const router = express.Router(); @@ -374,7 +375,42 @@ router.post( } updateRepoModel(repo.model, repoUpdate); - repo.source.type = "GitHubStream"; + + const r = gh(repoUpdate.fullName); + if (!r?.owner || !r?.name) { + await repo.resetSate(RepositoryStatus.ERROR, "repo_not_found"); + throw new AnonymousError("repo_not_found", { + object: req.body, + httpStatus: 404, + }); + } + const repository = await getRepositoryFromGitHub({ + accessToken: user.accessToken, + owner: r.owner, + repo: r.name, + }); + + if (!repository) { + await repo.resetSate(RepositoryStatus.ERROR, "repo_not_found"); + throw new AnonymousError("repo_not_found", { + object: req.body, + httpStatus: 404, + }); + } + console.log(repository); + if (repository.size) { + if ( + repository.size > config.AUTO_DOWNLOAD_REPO_SIZE && + repo.model.source.type == "GitHubDownload" + ) { + repo.model.source.type = "GitHubStream"; + } else if ( + repository.size < config.AUTO_DOWNLOAD_REPO_SIZE && + repo.model.source.type == "GitHubStream" + ) { + repo.model.source.type = "GitHubDownload"; + } + } const removeRepoFromConference = async (conferenceID: string) => { const conf = await ConferenceModel.findOne({ @@ -485,25 +521,27 @@ router.post("/", async (req: express.Request, res: express.Response) => { repo.source.accessToken = user.accessToken; repo.source.repositoryId = repository.model.id; repo.source.repositoryName = repoUpdate.fullName; + console.log(repository.size); + if ( + repository.size !== undefined && + repository.size < config.AUTO_DOWNLOAD_REPO_SIZE + ) { + repo.source.type = "GitHubDownload"; + } + if (repository.size) { + if ( + repository.size > config.AUTO_DOWNLOAD_REPO_SIZE && + repo.source.type == "GitHubDownload" + ) { + repo.source.type = "GitHubStream"; + } else if ( + repository.size < config.AUTO_DOWNLOAD_REPO_SIZE && + repo.source.type == "GitHubStream" + ) { + repo.source.type = "GitHubDownload"; + } + } - // if (repo.source.type === "GitHubDownload") { - // // details.size is in kilobytes - // if ( - // repository.size === undefined || - // repository.size > config.MAX_REPO_SIZE - // ) { - // throw new AnonymousError("invalid_mode", { - // object: repository, - // httpStatus: 400, - // }); - // } - // } - // if ( - // repository.size !== undefined && - // repository.size < config.AUTO_DOWNLOAD_REPO_SIZE - // ) { - // repo.source.type = "GitHubDownload"; - // } repo.conference = repoUpdate.conference; await repo.save(); diff --git a/src/server/routes/webview.ts b/src/server/routes/webview.ts index 8ef2fa8..f5ae1a1 100644 --- a/src/server/routes/webview.ts +++ b/src/server/routes/webview.ts @@ -100,7 +100,7 @@ async function webView(req: express.Request, res: express.Response) { if (f.extension() == "md") { const content = await streamToString(await f.anonymizedContent()); res - .contentType("html") + .contentType("text/html") .send(marked.marked(content, { headerIds: false, mangle: false })); } else { f.send(res); diff --git a/src/streamer/route.ts b/src/streamer/route.ts index 257c8ac..537cdf4 100644 --- a/src/streamer/route.ts +++ b/src/streamer/route.ts @@ -2,9 +2,7 @@ import * as express from "express"; import GitHubStream from "../core/source/GitHubStream"; import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils"; import { handleError } from "../server/routes/route-utils"; -import { contentType } from "mime-types"; -import storage from "../core/storage"; -import AnonymizedFile from "../core/AnonymizedFile"; +import { lookup } from "mime-types"; export const router = express.Router(); @@ -33,7 +31,7 @@ router.post("/", async (req: express.Request, res: express.Response) => { () => fileSha ); try { - const mime = contentType(filePath); + const mime = lookup(filePath); if (mime && !filePath.endsWith(".ts")) { res.contentType(mime); } else if (isTextFile(filePath)) {