fix: fix GitHubDownload

This commit is contained in:
tdurieux
2024-04-03 13:24:34 +01:00
parent fc469be61b
commit db67f53b2c
7 changed files with 112 additions and 57 deletions

View File

@@ -11,6 +11,7 @@ import { anonymizePath, isTextFile } from "./anonymize-utils";
import AnonymousError from "./AnonymousError";
import { handleError } from "../server/routes/route-utils";
import got from "got";
import storage from "./storage";
/**
* Represent a file in a anonymized repository

View File

@@ -19,6 +19,7 @@ import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymized
import { GitHubRepository } from "./source/GitHubRepository";
import { trace } from "@opentelemetry/api";
import { getToken } from "./GitHubUtils";
import { FILE_TYPE } from "./storage/Storage";
function anonymizeTreeRecursive(
tree: TreeElement,
@@ -219,6 +220,23 @@ export default class Repository {
});
}
async isReady() {
if (this.status !== RepositoryStatus.READY) return false;
if (
this.source.type == "GitHubDownload" &&
(await storage.exists(this.repoId)) == FILE_TYPE.NOT_FOUND
) {
await this.resetSate(RepositoryStatus.PREPARING);
await downloadQueue.add(this.repoId, this, {
jobId: this.repoId,
attempts: 3,
});
return false;
}
return true;
}
/**
* Update the repository if a new commit exists
*

View File

@@ -90,9 +90,9 @@ export default class GitHubDownload extends GitHubBase {
const span = trace
.getTracer("ano-file")
.startSpan("GHDownload.getFileContent");
span.setAttribute("repoId", file.repository.repoId);
span.setAttribute("repoId", this.data.repoId);
try {
const exists = await storage.exists(file.filePath);
const exists = await storage.exists(this.data.repoId, file.filePath);
if (exists === FILE_TYPE.FILE) {
return storage.read(this.data.repoId, file.filePath);
} else if (exists === FILE_TYPE.FOLDER) {

View File

@@ -19,7 +19,7 @@ export default class GitHubStream extends GitHubBase {
super(data);
}
downloadFile(token: string, sha: string) {
downloadFile(token: string, sha: string) {
const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile");
span.setAttribute("sha", sha);
const oct = octokit(token);
@@ -49,6 +49,53 @@ export default class GitHubStream extends GitHubBase {
}
}
async getFileContentCache(
filePath: string,
repoId: string,
fileSha: () => Promise<string> | string
) {
const span = trace
.getTracer("ano-file")
.startSpan("GHStream.getFileContent");
span.setAttribute("repoId", repoId);
span.setAttribute("file", filePath);
const fileInfo = await storage.exists(repoId, filePath);
if (fileInfo == FILE_TYPE.FILE) {
return storage.read(repoId, filePath);
} else if (fileInfo == FILE_TYPE.FOLDER) {
throw new AnonymousError("folder_not_supported", {
httpStatus: 400,
object: filePath,
});
}
const content = this.downloadFile(
await this.data.getToken(),
await fileSha()
);
content.on("close", () => {
span.end();
});
// duplicate the stream to write it to the storage
const stream1 = content.pipe(new stream.PassThrough());
const stream2 = content.pipe(new stream.PassThrough());
content.on("error", (error) => {
error = new AnonymousError("file_not_found", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: filePath,
});
stream1.emit("error", error);
stream2.emit("error", error);
});
storage.write(repoId, filePath, stream1, this.type);
return stream2;
}
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
const span = trace
.getTracer("ano-file")
@@ -62,44 +109,20 @@ export default class GitHubStream extends GitHubBase {
// compute the original path if ambiguous
await file.originalPath();
}
const fileInfo = await storage.exists(
return this.getFileContentCache(
file.filePath,
file.repository.repoId,
file.filePath
async () => {
const fileSha = await file.sha();
if (!fileSha) {
throw new AnonymousError("file_not_accessible", {
httpStatus: 404,
object: file,
});
}
return fileSha;
}
);
if (fileInfo == FILE_TYPE.FILE) {
return storage.read(file.repository.repoId, file.filePath);
} else if (fileInfo == FILE_TYPE.FOLDER) {
throw new AnonymousError("folder_not_supported", {
httpStatus: 400,
object: file,
});
}
span.setAttribute("path", file.filePath);
const file_sha = await file.sha();
if (!file_sha) {
throw new AnonymousError("file_not_accessible", {
httpStatus: 404,
object: file,
});
}
const content = this.downloadFile(await this.data.getToken(), file_sha);
// duplicate the stream to write it to the storage
const stream1 = content.pipe(new stream.PassThrough());
const stream2 = content.pipe(new stream.PassThrough());
content.on("error", (error) => {
error = new AnonymousError("file_not_found", {
httpStatus: (error as any).status || (error as any).httpStatus,
cause: error as Error,
object: file,
});
stream1.emit("error", error);
stream2.emit("error", error);
});
storage.write(file.repository.repoId, file.filePath, stream1, this.type);
return stream2;
} finally {
span.end();
}

View File

@@ -1,7 +1,7 @@
import { Tree } from "../types";
import config from "../../config";
import * as fs from "fs";
import { Extract } from "unzip-stream";
import { Extractq } from "unzip-stream";
import { join, basename, dirname } from "path";
import { Response } from "express";
import { Readable, pipeline, Transform } from "stream";
@@ -147,18 +147,18 @@ export default class FileSystem extends StorageBase {
let files = await fs.promises.readdir(fullPath);
const output: Tree = {};
for (let file of files) {
let filePath = join(dir, file);
let filePath = join(fullPath, file);
try {
const stats = await fs.promises.stat(join(fullPath, filePath));
const stats = await fs.promises.stat(filePath);
if (file[0] == "$") {
file = "\\" + file;
}
if (stats.isDirectory()) {
output[file] = await this.listFiles(repoId, filePath, opt);
output[file] = await this.listFiles(repoId, join(dir, file), opt);
} else if (stats.isFile()) {
if (opt.onEntry) {
opt.onEntry({
path: filePath,
path: join(dir, file),
size: stats.size,
});
}
@@ -177,18 +177,19 @@ export default class FileSystem extends StorageBase {
async extractZip(repoId: string, p: string, data: Readable): Promise<void> {
const pipe = promisify(pipeline);
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
return pipe(
data,
Extract({
path: fullPath,
decodeString: (buf) => {
const name = buf.toString();
const newName = name.substr(name.indexOf("/") + 1);
if (newName == "") return "/dev/null";
return newName;
},
})
);
const extractor = Extract({
path: fullPath,
decodeString: (buf) => {
const name = buf.toString();
const newName = name.substr(name.indexOf("/") + 1);
if (newName == "") {
return "___IGNORE___";
}
return newName;
},
});
await pipe(data, extractor);
await this.rm(repoId, join(p, "___IGNORE___"));
}
/** @override */

View File

@@ -31,6 +31,12 @@ router.get(
if (!repo) return;
try {
if (!(await repo.isReady())) {
throw new AnonymousError("repository_not_ready", {
object: this,
httpStatus: 503,
});
}
const f = new AnonymizedFile({
repository: repo,
anonymizedPath,

View File

@@ -3,6 +3,8 @@ import GitHubStream from "../core/source/GitHubStream";
import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils";
import { handleError } from "../server/routes/route-utils";
import { contentType } from "mime-types";
import storage from "../core/storage";
import AnonymizedFile from "../core/AnonymizedFile";
export const router = express.Router();
@@ -25,7 +27,11 @@ router.post("/", async (req: express.Request, res: express.Response) => {
commit: commit,
getToken: () => token,
});
const content = source.downloadFile(token, fileSha);
const content = await source.getFileContentCache(
filePath,
repoId,
() => fileSha
);
try {
const mime = contentType(filePath);
if (mime && !filePath.endsWith(".ts")) {