fix: fix GitHubDownload

This commit is contained in:
tdurieux
2024-04-03 13:24:34 +01:00
parent fc469be61b
commit db67f53b2c
7 changed files with 112 additions and 57 deletions
+1
View File
@@ -11,6 +11,7 @@ import { anonymizePath, isTextFile } from "./anonymize-utils";
import AnonymousError from "./AnonymousError"; import AnonymousError from "./AnonymousError";
import { handleError } from "../server/routes/route-utils"; import { handleError } from "../server/routes/route-utils";
import got from "got"; import got from "got";
import storage from "./storage";
/** /**
* Represent a file in a anonymized repository * Represent a file in a anonymized repository
+18
View File
@@ -19,6 +19,7 @@ import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymized
import { GitHubRepository } from "./source/GitHubRepository"; import { GitHubRepository } from "./source/GitHubRepository";
import { trace } from "@opentelemetry/api"; import { trace } from "@opentelemetry/api";
import { getToken } from "./GitHubUtils"; import { getToken } from "./GitHubUtils";
import { FILE_TYPE } from "./storage/Storage";
function anonymizeTreeRecursive( function anonymizeTreeRecursive(
tree: TreeElement, tree: TreeElement,
@@ -219,6 +220,23 @@ export default class Repository {
}); });
} }
async isReady() {
if (this.status !== RepositoryStatus.READY) return false;
if (
this.source.type == "GitHubDownload" &&
(await storage.exists(this.repoId)) == FILE_TYPE.NOT_FOUND
) {
await this.resetSate(RepositoryStatus.PREPARING);
await downloadQueue.add(this.repoId, this, {
jobId: this.repoId,
attempts: 3,
});
return false;
}
return true;
}
/** /**
* Update the repository if a new commit exists * Update the repository if a new commit exists
* *
+2 -2
View File
@@ -90,9 +90,9 @@ export default class GitHubDownload extends GitHubBase {
const span = trace const span = trace
.getTracer("ano-file") .getTracer("ano-file")
.startSpan("GHDownload.getFileContent"); .startSpan("GHDownload.getFileContent");
span.setAttribute("repoId", file.repository.repoId); span.setAttribute("repoId", this.data.repoId);
try { try {
const exists = await storage.exists(file.filePath); const exists = await storage.exists(this.data.repoId, file.filePath);
if (exists === FILE_TYPE.FILE) { if (exists === FILE_TYPE.FILE) {
return storage.read(this.data.repoId, file.filePath); return storage.read(this.data.repoId, file.filePath);
} else if (exists === FILE_TYPE.FOLDER) { } else if (exists === FILE_TYPE.FOLDER) {
+60 -37
View File
@@ -19,7 +19,7 @@ export default class GitHubStream extends GitHubBase {
super(data); super(data);
} }
downloadFile(token: string, sha: string) { downloadFile(token: string, sha: string) {
const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile"); const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile");
span.setAttribute("sha", sha); span.setAttribute("sha", sha);
const oct = octokit(token); const oct = octokit(token);
@@ -49,6 +49,53 @@ export default class GitHubStream extends GitHubBase {
} }
} }
async getFileContentCache(
filePath: string,
repoId: string,
fileSha: () => Promise<string> | string
) {
const span = trace
.getTracer("ano-file")
.startSpan("GHStream.getFileContent");
span.setAttribute("repoId", repoId);
span.setAttribute("file", filePath);
const fileInfo = await storage.exists(repoId, filePath);
if (fileInfo == FILE_TYPE.FILE) {
return storage.read(repoId, filePath);
} else if (fileInfo == FILE_TYPE.FOLDER) {
throw new AnonymousError("folder_not_supported", {
httpStatus: 400,
object: filePath,
});
}
const content = this.downloadFile(
await this.data.getToken(),
await fileSha()
);
content.on("close", () => {
span.end();
});
// duplicate the stream to write it to the storage
const stream1 = content.pipe(new stream.PassThrough());
const stream2 = content.pipe(new stream.PassThrough());
content.on("error", (error) => {
error = new AnonymousError("file_not_found", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: filePath,
});
stream1.emit("error", error);
stream2.emit("error", error);
});
storage.write(repoId, filePath, stream1, this.type);
return stream2;
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> { async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
const span = trace const span = trace
.getTracer("ano-file") .getTracer("ano-file")
@@ -62,44 +109,20 @@ export default class GitHubStream extends GitHubBase {
// compute the original path if ambiguous // compute the original path if ambiguous
await file.originalPath(); await file.originalPath();
} }
const fileInfo = await storage.exists( return this.getFileContentCache(
file.filePath,
file.repository.repoId, file.repository.repoId,
file.filePath async () => {
const fileSha = await file.sha();
if (!fileSha) {
throw new AnonymousError("file_not_accessible", {
httpStatus: 404,
object: file,
});
}
return fileSha;
}
); );
if (fileInfo == FILE_TYPE.FILE) {
return storage.read(file.repository.repoId, file.filePath);
} else if (fileInfo == FILE_TYPE.FOLDER) {
throw new AnonymousError("folder_not_supported", {
httpStatus: 400,
object: file,
});
}
span.setAttribute("path", file.filePath);
const file_sha = await file.sha();
if (!file_sha) {
throw new AnonymousError("file_not_accessible", {
httpStatus: 404,
object: file,
});
}
const content = this.downloadFile(await this.data.getToken(), file_sha);
// duplicate the stream to write it to the storage
const stream1 = content.pipe(new stream.PassThrough());
const stream2 = content.pipe(new stream.PassThrough());
content.on("error", (error) => {
error = new AnonymousError("file_not_found", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
stream1.emit("error", error);
stream2.emit("error", error);
});
storage.write(file.repository.repoId, file.filePath, stream1, this.type);
return stream2;
} finally { } finally {
span.end(); span.end();
} }
+18 -17
View File
@@ -1,7 +1,7 @@
import { Tree } from "../types"; import { Tree } from "../types";
import config from "../../config"; import config from "../../config";
import * as fs from "fs"; import * as fs from "fs";
import { Extract } from "unzip-stream"; import { Extractq } from "unzip-stream";
import { join, basename, dirname } from "path"; import { join, basename, dirname } from "path";
import { Response } from "express"; import { Response } from "express";
import { Readable, pipeline, Transform } from "stream"; import { Readable, pipeline, Transform } from "stream";
@@ -147,18 +147,18 @@ export default class FileSystem extends StorageBase {
let files = await fs.promises.readdir(fullPath); let files = await fs.promises.readdir(fullPath);
const output: Tree = {}; const output: Tree = {};
for (let file of files) { for (let file of files) {
let filePath = join(dir, file); let filePath = join(fullPath, file);
try { try {
const stats = await fs.promises.stat(join(fullPath, filePath)); const stats = await fs.promises.stat(filePath);
if (file[0] == "$") { if (file[0] == "$") {
file = "\\" + file; file = "\\" + file;
} }
if (stats.isDirectory()) { if (stats.isDirectory()) {
output[file] = await this.listFiles(repoId, filePath, opt); output[file] = await this.listFiles(repoId, join(dir, file), opt);
} else if (stats.isFile()) { } else if (stats.isFile()) {
if (opt.onEntry) { if (opt.onEntry) {
opt.onEntry({ opt.onEntry({
path: filePath, path: join(dir, file),
size: stats.size, size: stats.size,
}); });
} }
@@ -177,18 +177,19 @@ export default class FileSystem extends StorageBase {
async extractZip(repoId: string, p: string, data: Readable): Promise<void> { async extractZip(repoId: string, p: string, data: Readable): Promise<void> {
const pipe = promisify(pipeline); const pipe = promisify(pipeline);
const fullPath = join(config.FOLDER, this.repoPath(repoId), p); const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
return pipe( const extractor = Extract({
data, path: fullPath,
Extract({ decodeString: (buf) => {
path: fullPath, const name = buf.toString();
decodeString: (buf) => { const newName = name.substr(name.indexOf("/") + 1);
const name = buf.toString(); if (newName == "") {
const newName = name.substr(name.indexOf("/") + 1); return "___IGNORE___";
if (newName == "") return "/dev/null"; }
return newName; return newName;
}, },
}) });
); await pipe(data, extractor);
await this.rm(repoId, join(p, "___IGNORE___"));
} }
/** @override */ /** @override */
+6
View File
@@ -31,6 +31,12 @@ router.get(
if (!repo) return; if (!repo) return;
try { try {
if (!(await repo.isReady())) {
throw new AnonymousError("repository_not_ready", {
object: this,
httpStatus: 503,
});
}
const f = new AnonymizedFile({ const f = new AnonymizedFile({
repository: repo, repository: repo,
anonymizedPath, anonymizedPath,
+7 -1
View File
@@ -3,6 +3,8 @@ import GitHubStream from "../core/source/GitHubStream";
import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils"; import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils";
import { handleError } from "../server/routes/route-utils"; import { handleError } from "../server/routes/route-utils";
import { contentType } from "mime-types"; import { contentType } from "mime-types";
import storage from "../core/storage";
import AnonymizedFile from "../core/AnonymizedFile";
export const router = express.Router(); export const router = express.Router();
@@ -25,7 +27,11 @@ router.post("/", async (req: express.Request, res: express.Response) => {
commit: commit, commit: commit,
getToken: () => token, getToken: () => token,
}); });
const content = source.downloadFile(token, fileSha); const content = await source.getFileContentCache(
filePath,
repoId,
() => fileSha
);
try { try {
const mime = contentType(filePath); const mime = contentType(filePath);
if (mime && !filePath.endsWith(".ts")) { if (mime && !filePath.endsWith(".ts")) {