mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-02-12 18:32:44 +00:00
fix: fix GitHubDownload
This commit is contained in:
@@ -11,6 +11,7 @@ import { anonymizePath, isTextFile } from "./anonymize-utils";
|
||||
import AnonymousError from "./AnonymousError";
|
||||
import { handleError } from "../server/routes/route-utils";
|
||||
import got from "got";
|
||||
import storage from "./storage";
|
||||
|
||||
/**
|
||||
* Represent a file in a anonymized repository
|
||||
|
||||
@@ -19,6 +19,7 @@ import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymized
|
||||
import { GitHubRepository } from "./source/GitHubRepository";
|
||||
import { trace } from "@opentelemetry/api";
|
||||
import { getToken } from "./GitHubUtils";
|
||||
import { FILE_TYPE } from "./storage/Storage";
|
||||
|
||||
function anonymizeTreeRecursive(
|
||||
tree: TreeElement,
|
||||
@@ -219,6 +220,23 @@ export default class Repository {
|
||||
});
|
||||
}
|
||||
|
||||
async isReady() {
|
||||
if (this.status !== RepositoryStatus.READY) return false;
|
||||
if (
|
||||
this.source.type == "GitHubDownload" &&
|
||||
(await storage.exists(this.repoId)) == FILE_TYPE.NOT_FOUND
|
||||
) {
|
||||
await this.resetSate(RepositoryStatus.PREPARING);
|
||||
|
||||
await downloadQueue.add(this.repoId, this, {
|
||||
jobId: this.repoId,
|
||||
attempts: 3,
|
||||
});
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the repository if a new commit exists
|
||||
*
|
||||
|
||||
@@ -90,9 +90,9 @@ export default class GitHubDownload extends GitHubBase {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("GHDownload.getFileContent");
|
||||
span.setAttribute("repoId", file.repository.repoId);
|
||||
span.setAttribute("repoId", this.data.repoId);
|
||||
try {
|
||||
const exists = await storage.exists(file.filePath);
|
||||
const exists = await storage.exists(this.data.repoId, file.filePath);
|
||||
if (exists === FILE_TYPE.FILE) {
|
||||
return storage.read(this.data.repoId, file.filePath);
|
||||
} else if (exists === FILE_TYPE.FOLDER) {
|
||||
|
||||
@@ -19,7 +19,7 @@ export default class GitHubStream extends GitHubBase {
|
||||
super(data);
|
||||
}
|
||||
|
||||
downloadFile(token: string, sha: string) {
|
||||
downloadFile(token: string, sha: string) {
|
||||
const span = trace.getTracer("ano-file").startSpan("GHStream.downloadFile");
|
||||
span.setAttribute("sha", sha);
|
||||
const oct = octokit(token);
|
||||
@@ -49,6 +49,53 @@ export default class GitHubStream extends GitHubBase {
|
||||
}
|
||||
}
|
||||
|
||||
async getFileContentCache(
|
||||
filePath: string,
|
||||
repoId: string,
|
||||
fileSha: () => Promise<string> | string
|
||||
) {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
.startSpan("GHStream.getFileContent");
|
||||
span.setAttribute("repoId", repoId);
|
||||
span.setAttribute("file", filePath);
|
||||
|
||||
const fileInfo = await storage.exists(repoId, filePath);
|
||||
if (fileInfo == FILE_TYPE.FILE) {
|
||||
return storage.read(repoId, filePath);
|
||||
} else if (fileInfo == FILE_TYPE.FOLDER) {
|
||||
throw new AnonymousError("folder_not_supported", {
|
||||
httpStatus: 400,
|
||||
object: filePath,
|
||||
});
|
||||
}
|
||||
const content = this.downloadFile(
|
||||
await this.data.getToken(),
|
||||
await fileSha()
|
||||
);
|
||||
|
||||
content.on("close", () => {
|
||||
span.end();
|
||||
});
|
||||
|
||||
// duplicate the stream to write it to the storage
|
||||
const stream1 = content.pipe(new stream.PassThrough());
|
||||
const stream2 = content.pipe(new stream.PassThrough());
|
||||
|
||||
content.on("error", (error) => {
|
||||
error = new AnonymousError("file_not_found", {
|
||||
httpStatus: (error as any).status || (error as any).httpStatus,
|
||||
cause: error as Error,
|
||||
object: filePath,
|
||||
});
|
||||
stream1.emit("error", error);
|
||||
stream2.emit("error", error);
|
||||
});
|
||||
|
||||
storage.write(repoId, filePath, stream1, this.type);
|
||||
return stream2;
|
||||
}
|
||||
|
||||
async getFileContent(file: AnonymizedFile): Promise<stream.Readable> {
|
||||
const span = trace
|
||||
.getTracer("ano-file")
|
||||
@@ -62,44 +109,20 @@ export default class GitHubStream extends GitHubBase {
|
||||
// compute the original path if ambiguous
|
||||
await file.originalPath();
|
||||
}
|
||||
const fileInfo = await storage.exists(
|
||||
return this.getFileContentCache(
|
||||
file.filePath,
|
||||
file.repository.repoId,
|
||||
file.filePath
|
||||
async () => {
|
||||
const fileSha = await file.sha();
|
||||
if (!fileSha) {
|
||||
throw new AnonymousError("file_not_accessible", {
|
||||
httpStatus: 404,
|
||||
object: file,
|
||||
});
|
||||
}
|
||||
return fileSha;
|
||||
}
|
||||
);
|
||||
if (fileInfo == FILE_TYPE.FILE) {
|
||||
return storage.read(file.repository.repoId, file.filePath);
|
||||
} else if (fileInfo == FILE_TYPE.FOLDER) {
|
||||
throw new AnonymousError("folder_not_supported", {
|
||||
httpStatus: 400,
|
||||
object: file,
|
||||
});
|
||||
}
|
||||
span.setAttribute("path", file.filePath);
|
||||
const file_sha = await file.sha();
|
||||
if (!file_sha) {
|
||||
throw new AnonymousError("file_not_accessible", {
|
||||
httpStatus: 404,
|
||||
object: file,
|
||||
});
|
||||
}
|
||||
const content = this.downloadFile(await this.data.getToken(), file_sha);
|
||||
|
||||
// duplicate the stream to write it to the storage
|
||||
const stream1 = content.pipe(new stream.PassThrough());
|
||||
const stream2 = content.pipe(new stream.PassThrough());
|
||||
|
||||
content.on("error", (error) => {
|
||||
error = new AnonymousError("file_not_found", {
|
||||
httpStatus: (error as any).status || (error as any).httpStatus,
|
||||
cause: error as Error,
|
||||
object: file,
|
||||
});
|
||||
stream1.emit("error", error);
|
||||
stream2.emit("error", error);
|
||||
});
|
||||
|
||||
storage.write(file.repository.repoId, file.filePath, stream1, this.type);
|
||||
return stream2;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { Tree } from "../types";
|
||||
import config from "../../config";
|
||||
import * as fs from "fs";
|
||||
import { Extract } from "unzip-stream";
|
||||
import { Extractq } from "unzip-stream";
|
||||
import { join, basename, dirname } from "path";
|
||||
import { Response } from "express";
|
||||
import { Readable, pipeline, Transform } from "stream";
|
||||
@@ -147,18 +147,18 @@ export default class FileSystem extends StorageBase {
|
||||
let files = await fs.promises.readdir(fullPath);
|
||||
const output: Tree = {};
|
||||
for (let file of files) {
|
||||
let filePath = join(dir, file);
|
||||
let filePath = join(fullPath, file);
|
||||
try {
|
||||
const stats = await fs.promises.stat(join(fullPath, filePath));
|
||||
const stats = await fs.promises.stat(filePath);
|
||||
if (file[0] == "$") {
|
||||
file = "\\" + file;
|
||||
}
|
||||
if (stats.isDirectory()) {
|
||||
output[file] = await this.listFiles(repoId, filePath, opt);
|
||||
output[file] = await this.listFiles(repoId, join(dir, file), opt);
|
||||
} else if (stats.isFile()) {
|
||||
if (opt.onEntry) {
|
||||
opt.onEntry({
|
||||
path: filePath,
|
||||
path: join(dir, file),
|
||||
size: stats.size,
|
||||
});
|
||||
}
|
||||
@@ -177,18 +177,19 @@ export default class FileSystem extends StorageBase {
|
||||
async extractZip(repoId: string, p: string, data: Readable): Promise<void> {
|
||||
const pipe = promisify(pipeline);
|
||||
const fullPath = join(config.FOLDER, this.repoPath(repoId), p);
|
||||
return pipe(
|
||||
data,
|
||||
Extract({
|
||||
path: fullPath,
|
||||
decodeString: (buf) => {
|
||||
const name = buf.toString();
|
||||
const newName = name.substr(name.indexOf("/") + 1);
|
||||
if (newName == "") return "/dev/null";
|
||||
return newName;
|
||||
},
|
||||
})
|
||||
);
|
||||
const extractor = Extract({
|
||||
path: fullPath,
|
||||
decodeString: (buf) => {
|
||||
const name = buf.toString();
|
||||
const newName = name.substr(name.indexOf("/") + 1);
|
||||
if (newName == "") {
|
||||
return "___IGNORE___";
|
||||
}
|
||||
return newName;
|
||||
},
|
||||
});
|
||||
await pipe(data, extractor);
|
||||
await this.rm(repoId, join(p, "___IGNORE___"));
|
||||
}
|
||||
|
||||
/** @override */
|
||||
|
||||
@@ -31,6 +31,12 @@ router.get(
|
||||
if (!repo) return;
|
||||
|
||||
try {
|
||||
if (!(await repo.isReady())) {
|
||||
throw new AnonymousError("repository_not_ready", {
|
||||
object: this,
|
||||
httpStatus: 503,
|
||||
});
|
||||
}
|
||||
const f = new AnonymizedFile({
|
||||
repository: repo,
|
||||
anonymizedPath,
|
||||
|
||||
@@ -3,6 +3,8 @@ import GitHubStream from "../core/source/GitHubStream";
|
||||
import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils";
|
||||
import { handleError } from "../server/routes/route-utils";
|
||||
import { contentType } from "mime-types";
|
||||
import storage from "../core/storage";
|
||||
import AnonymizedFile from "../core/AnonymizedFile";
|
||||
|
||||
export const router = express.Router();
|
||||
|
||||
@@ -25,7 +27,11 @@ router.post("/", async (req: express.Request, res: express.Response) => {
|
||||
commit: commit,
|
||||
getToken: () => token,
|
||||
});
|
||||
const content = source.downloadFile(token, fileSha);
|
||||
const content = await source.getFileContentCache(
|
||||
filePath,
|
||||
repoId,
|
||||
() => fileSha
|
||||
);
|
||||
try {
|
||||
const mime = contentType(filePath);
|
||||
if (mime && !filePath.endsWith(".ts")) {
|
||||
|
||||
Reference in New Issue
Block a user