From db67f53b2c8a14cfdc9d6a6e178b6ce56e036345 Mon Sep 17 00:00:00 2001 From: tdurieux Date: Wed, 3 Apr 2024 13:24:34 +0100 Subject: [PATCH] fix: fix GitHubDownload --- src/core/AnonymizedFile.ts | 1 + src/core/Repository.ts | 18 ++++++ src/core/source/GitHubDownload.ts | 4 +- src/core/source/GitHubStream.ts | 97 +++++++++++++++++++------------ src/core/storage/FileSystem.ts | 35 +++++------ src/server/routes/file.ts | 6 ++ src/streamer/route.ts | 8 ++- 7 files changed, 112 insertions(+), 57 deletions(-) diff --git a/src/core/AnonymizedFile.ts b/src/core/AnonymizedFile.ts index e45dd5a..885f0fd 100644 --- a/src/core/AnonymizedFile.ts +++ b/src/core/AnonymizedFile.ts @@ -11,6 +11,7 @@ import { anonymizePath, isTextFile } from "./anonymize-utils"; import AnonymousError from "./AnonymousError"; import { handleError } from "../server/routes/route-utils"; import got from "got"; +import storage from "./storage"; /** * Represent a file in a anonymized repository diff --git a/src/core/Repository.ts b/src/core/Repository.ts index e91e762..f1f462b 100644 --- a/src/core/Repository.ts +++ b/src/core/Repository.ts @@ -19,6 +19,7 @@ import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymized import { GitHubRepository } from "./source/GitHubRepository"; import { trace } from "@opentelemetry/api"; import { getToken } from "./GitHubUtils"; +import { FILE_TYPE } from "./storage/Storage"; function anonymizeTreeRecursive( tree: TreeElement, @@ -219,6 +220,23 @@ export default class Repository { }); } + async isReady() { + if (this.status !== RepositoryStatus.READY) return false; + if ( + this.source.type == "GitHubDownload" && + (await storage.exists(this.repoId)) == FILE_TYPE.NOT_FOUND + ) { + await this.resetSate(RepositoryStatus.PREPARING); + + await downloadQueue.add(this.repoId, this, { + jobId: this.repoId, + attempts: 3, + }); + return false; + } + return true; + } + /** * Update the repository if a new commit exists * diff --git a/src/core/source/GitHubDownload.ts b/src/core/source/GitHubDownload.ts index 4465b71..e2abe64 100644 --- a/src/core/source/GitHubDownload.ts +++ b/src/core/source/GitHubDownload.ts @@ -90,9 +90,9 @@ export default class GitHubDownload extends GitHubBase { const span = trace .getTracer("ano-file") .startSpan("GHDownload.getFileContent"); - span.setAttribute("repoId", file.repository.repoId); + span.setAttribute("repoId", this.data.repoId); try { - const exists = await storage.exists(file.filePath); + const exists = await storage.exists(this.data.repoId, file.filePath); if (exists === FILE_TYPE.FILE) { return storage.read(this.data.repoId, file.filePath); } else if (exists === FILE_TYPE.FOLDER) { diff --git a/src/core/source/GitHubStream.ts b/src/core/source/GitHubStream.ts index 370dece..aecb5e5 100644 --- a/src/core/source/GitHubStream.ts +++ b/src/core/source/GitHubStream.ts @@ -19,7 +19,7 @@ export default class GitHubStream extends GitHubBase { super(data); } - downloadFile(token: string, sha: string) { + downloadFile(token: string, sha: string) { const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile"); span.setAttribute("sha", sha); const oct = octokit(token); @@ -49,6 +49,53 @@ export default class GitHubStream extends GitHubBase { } } + async getFileContentCache( + filePath: string, + repoId: string, + fileSha: () => Promise | string + ) { + const span = trace + .getTracer("ano-file") + .startSpan("GHStream.getFileContent"); + span.setAttribute("repoId", repoId); + span.setAttribute("file", filePath); + + const fileInfo = await storage.exists(repoId, filePath); + if (fileInfo == FILE_TYPE.FILE) { + return storage.read(repoId, filePath); + } else if (fileInfo == FILE_TYPE.FOLDER) { + throw new AnonymousError("folder_not_supported", { + httpStatus: 400, + object: filePath, + }); + } + const content = this.downloadFile( + await this.data.getToken(), + await fileSha() + ); + + content.on("close", () => { + span.end(); + }); + + // duplicate the stream to write it to the storage + const stream1 = content.pipe(new stream.PassThrough()); + const stream2 = content.pipe(new stream.PassThrough()); + + content.on("error", (error) => { + error = new AnonymousError("file_not_found", { + httpStatus: (error as any).status || (error as any).httpStatus, + cause: error as Error, + object: filePath, + }); + stream1.emit("error", error); + stream2.emit("error", error); + }); + + storage.write(repoId, filePath, stream1, this.type); + return stream2; + } + async getFileContent(file: AnonymizedFile): Promise { const span = trace .getTracer("ano-file") @@ -62,44 +109,20 @@ export default class GitHubStream extends GitHubBase { // compute the original path if ambiguous await file.originalPath(); } - const fileInfo = await storage.exists( + return this.getFileContentCache( + file.filePath, file.repository.repoId, - file.filePath + async () => { + const fileSha = await file.sha(); + if (!fileSha) { + throw new AnonymousError("file_not_accessible", { + httpStatus: 404, + object: file, + }); + } + return fileSha; + } ); - if (fileInfo == FILE_TYPE.FILE) { - return storage.read(file.repository.repoId, file.filePath); - } else if (fileInfo == FILE_TYPE.FOLDER) { - throw new AnonymousError("folder_not_supported", { - httpStatus: 400, - object: file, - }); - } - span.setAttribute("path", file.filePath); - const file_sha = await file.sha(); - if (!file_sha) { - throw new AnonymousError("file_not_accessible", { - httpStatus: 404, - object: file, - }); - } - const content = this.downloadFile(await this.data.getToken(), file_sha); - - // duplicate the stream to write it to the storage - const stream1 = content.pipe(new stream.PassThrough()); - const stream2 = content.pipe(new stream.PassThrough()); - - content.on("error", (error) => { - error = new AnonymousError("file_not_found", { - httpStatus: (error as any).status || (error as any).httpStatus, - cause: error as Error, - object: file, - }); - stream1.emit("error", error); - stream2.emit("error", error); - }); - - storage.write(file.repository.repoId, file.filePath, stream1, this.type); - return stream2; } finally { span.end(); } diff --git a/src/core/storage/FileSystem.ts b/src/core/storage/FileSystem.ts index 03171e7..a29ca41 100644 --- a/src/core/storage/FileSystem.ts +++ b/src/core/storage/FileSystem.ts @@ -1,7 +1,7 @@ import { Tree } from "../types"; import config from "../../config"; import * as fs from "fs"; -import { Extract } from "unzip-stream"; +import { Extractq } from "unzip-stream"; import { join, basename, dirname } from "path"; import { Response } from "express"; import { Readable, pipeline, Transform } from "stream"; @@ -147,18 +147,18 @@ export default class FileSystem extends StorageBase { let files = await fs.promises.readdir(fullPath); const output: Tree = {}; for (let file of files) { - let filePath = join(dir, file); + let filePath = join(fullPath, file); try { - const stats = await fs.promises.stat(join(fullPath, filePath)); + const stats = await fs.promises.stat(filePath); if (file[0] == "$") { file = "\\" + file; } if (stats.isDirectory()) { - output[file] = await this.listFiles(repoId, filePath, opt); + output[file] = await this.listFiles(repoId, join(dir, file), opt); } else if (stats.isFile()) { if (opt.onEntry) { opt.onEntry({ - path: filePath, + path: join(dir, file), size: stats.size, }); } @@ -177,18 +177,19 @@ export default class FileSystem extends StorageBase { async extractZip(repoId: string, p: string, data: Readable): Promise { const pipe = promisify(pipeline); const fullPath = join(config.FOLDER, this.repoPath(repoId), p); - return pipe( - data, - Extract({ - path: fullPath, - decodeString: (buf) => { - const name = buf.toString(); - const newName = name.substr(name.indexOf("/") + 1); - if (newName == "") return "/dev/null"; - return newName; - }, - }) - ); + const extractor = Extract({ + path: fullPath, + decodeString: (buf) => { + const name = buf.toString(); + const newName = name.substr(name.indexOf("/") + 1); + if (newName == "") { + return "___IGNORE___"; + } + return newName; + }, + }); + await pipe(data, extractor); + await this.rm(repoId, join(p, "___IGNORE___")); } /** @override */ diff --git a/src/server/routes/file.ts b/src/server/routes/file.ts index 5602c0e..88bf1cf 100644 --- a/src/server/routes/file.ts +++ b/src/server/routes/file.ts @@ -31,6 +31,12 @@ router.get( if (!repo) return; try { + if (!(await repo.isReady())) { + throw new AnonymousError("repository_not_ready", { + object: this, + httpStatus: 503, + }); + } const f = new AnonymizedFile({ repository: repo, anonymizedPath, diff --git a/src/streamer/route.ts b/src/streamer/route.ts index 9e1f575..257c8ac 100644 --- a/src/streamer/route.ts +++ b/src/streamer/route.ts @@ -3,6 +3,8 @@ import GitHubStream from "../core/source/GitHubStream"; import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils"; import { handleError } from "../server/routes/route-utils"; import { contentType } from "mime-types"; +import storage from "../core/storage"; +import AnonymizedFile from "../core/AnonymizedFile"; export const router = express.Router(); @@ -25,7 +27,11 @@ router.post("/", async (req: express.Request, res: express.Response) => { commit: commit, getToken: () => token, }); - const content = source.downloadFile(token, fileSha); + const content = await source.getFileContentCache( + filePath, + repoId, + () => fileSha + ); try { const mime = contentType(filePath); if (mime && !filePath.endsWith(".ts")) {