feat: improve download anonymized repository

This commit is contained in:
tdurieux
2024-05-06 11:52:32 +02:00
parent 93606a5c39
commit dcf483ea03
5 changed files with 145 additions and 40 deletions

View File

@@ -289,14 +289,9 @@ export default class AnonymizedFile {
this.sha(),
this.repository.getToken(),
]);
// const hostName = new URL(config.STREAMER_ENTRYPOINT).hostname;
// const ipHost = await this.cacheableLookup.lookupAsync(hostName);
got
const resStream = got
.stream(join(config.STREAMER_ENTRYPOINT, "api"), {
method: "POST",
// lookup: this.cacheableLookup.lookup,
// host: ipHost.address,
// dnsCache: this.cacheableLookup,
json: {
sha,
token,
@@ -308,7 +303,8 @@ export default class AnonymizedFile {
anonymizerOptions: anonymizer.opt,
},
})
.on("error", () => {
.on("error", (err) => {
span.recordException(err);
handleError(
new AnonymousError("file_not_found", {
object: this,
@@ -316,12 +312,17 @@ export default class AnonymizedFile {
}),
res
);
})
.pipe(res)
.on("close", () => {
span.end();
resolve();
});
resStream.pipe(res);
res.on("close", () => {
span.end();
resolve();
});
res.on("error", (err) => {
reject(err);
span.recordException(err);
span.end();
});
return;
}

View File

@@ -16,7 +16,7 @@ export default class GitHubDownload extends GitHubBase {
super(data);
}
private async _getZipUrl(): Promise<OctokitResponse<unknown, 302>> {
public async getZipUrl(): Promise<OctokitResponse<unknown, 302>> {
const oct = octokit(await this.data.getToken());
return oct.rest.repos.downloadZipballArchive({
owner: this.data.organization,
@@ -32,11 +32,11 @@ export default class GitHubDownload extends GitHubBase {
try {
let response: OctokitResponse<unknown, number>;
try {
response = await this._getZipUrl();
response = await this.getZipUrl();
} catch (error) {
span.recordException(error as Error);
throw new AnonymousError("repo_not_accessible", {
httpStatus: 404,
throw new AnonymousError("repo_not_found", {
httpStatus: (error as any).status || 404,
object: this.data,
cause: error as Error,
});

View File

@@ -194,7 +194,8 @@ export default class GitHubStream extends GitHubBase {
});
output.push(...this.tree2Tree(data.tree, parentPath));
} catch (error) {
if ((error as any).status == 404) {
console.log(error);
if ((error as any).status == 409 || (error as any).status == 404) {
// empty repo
data = { tree: [] };
} else {

View File

@@ -2,6 +2,8 @@ import { promisify } from "util";
import * as express from "express";
import * as stream from "stream";
import config from "../../config";
import got from "got";
import { join } from "path";
import { getRepo, getUser, handleError } from "./route-utils";
import AnonymousError from "../../core/AnonymousError";
@@ -26,17 +28,15 @@ router.get(
const repo = await getRepo(req, res);
if (!repo) return;
let user: User | undefined = undefined;
try {
user = await getUser(req);
} catch (_) {}
let download = false;
const conference = await repo.conference();
if (conference) {
download =
conference.quota.size > -1 &&
!!config.ENABLE_DOWNLOAD &&
repo.source.type == "GitHubDownload";
}
if (
repo.size.storage < config.FREE_DOWNLOAD_REPO_SIZE * 1024 &&
repo.source.type == "GitHubDownload"
(!!config.ENABLE_DOWNLOAD && !!config.STREAMER_ENTRYPOINT) ||
user?.isAdmin === true
) {
download = true;
}
@@ -48,6 +48,44 @@ router.get(
});
}
await repo.countView();
if (config.STREAMER_ENTRYPOINT) {
// use the streamer service
const token = await repo.getToken();
const anonymizer = repo.generateAnonymizeTransformer("");
res.attachment(`${repo.repoId}.zip`);
const reqStream = got
.stream(join(config.STREAMER_ENTRYPOINT, "api/download"), {
method: "POST",
json: {
token,
repoFullName: repo.model.source.repositoryName,
commit: repo.model.source.commit,
branch: repo.model.source.branch,
repoId: repo.repoId,
anonymizerOptions: anonymizer.opt,
},
})
.on("error", () => {
handleError(
new AnonymousError("file_not_found", {
object: this,
httpStatus: 404,
}),
res
);
});
reqStream.pipe(res);
res.on("close", () => {
reqStream.destroy();
});
res.on("error", () => {
reqStream.destroy();
});
return;
}
res.attachment(`${repo.repoId}.zip`);
// cache the file for 6 hours
@@ -125,7 +163,7 @@ router.get(
throw new AnonymousError(
repo.model.statusMessage
? repo.model.statusMessage
: "repository_not_available",
: "repository_not_accessible",
{
object: repo,
httpStatus: 500,
@@ -142,17 +180,7 @@ router.get(
}
let download = false;
const conference = await repo.conference();
if (conference) {
download =
conference.quota.size > -1 &&
!!config.ENABLE_DOWNLOAD &&
repo.source.type == "GitHubDownload";
}
if (
repo.size.storage < config.FREE_DOWNLOAD_REPO_SIZE * 1024 &&
repo.source.type == "GitHubDownload"
) {
if (!!config.ENABLE_DOWNLOAD && !!config.STREAMER_ENTRYPOINT) {
download = true;
}
@@ -162,7 +190,7 @@ router.get(
} catch (_) {}
res.json({
url: redirectURL,
download,
download: download || user?.isAdmin === true,
lastUpdateDate: repo.model.source.commitDate
? repo.model.source.commitDate
: repo.model.anonymizeDate,

View File

@@ -1,11 +1,86 @@
import { promisify } from "util";
import * as stream from "stream";
import * as express from "express";
import GitHubStream from "../core/source/GitHubStream";
import { AnonymizeTransformer, isTextFile } from "../core/anonymize-utils";
import {
anonymizePath,
AnonymizeTransformer,
isTextFile,
} from "../core/anonymize-utils";
import { handleError } from "../server/routes/route-utils";
import { lookup } from "mime-types";
import GitHubDownload from "../core/source/GitHubDownload";
import got from "got";
import { Parse } from "unzip-stream";
import archiver = require("archiver");
export const router = express.Router();
router.post(
"/download",
async (req: express.Request, res: express.Response) => {
const token: string = req.body.token;
const repoFullName = req.body.repoFullName.split("/");
const repoId = req.body.repoId;
const branch = req.body.branch;
const commit = req.body.commit;
const anonymizerOptions = req.body.anonymizerOptions;
try {
const source = new GitHubDownload({
repoId,
organization: repoFullName[0],
repoName: repoFullName[1],
commit: commit,
getToken: () => token,
});
const response = await source.getZipUrl();
const downloadStream = got.stream(response.url);
res.on("error", (error) => {
console.error(error);
downloadStream.destroy();
});
res.on("close", () => {
downloadStream.destroy();
});
const archive = archiver("zip", {});
downloadStream
.pipe(Parse())
.on("entry", (entry) => {
if (entry.type === "File") {
try {
const fileName = anonymizePath(
entry.path.substring(entry.path.indexOf("/") + 1),
anonymizerOptions.terms || []
);
const anonymizer = new AnonymizeTransformer(anonymizerOptions);
anonymizer.opt.filePath = fileName;
const st = entry.pipe(anonymizer);
archive.append(st, { name: fileName });
} catch (error) {
entry.autodrain();
console.error(error);
}
} else {
entry.autodrain();
}
})
.on("error", (error) => {
console.error(error);
archive.finalize();
})
.on("finish", () => {
archive.finalize();
});
archive.pipe(res);
} catch (error) {
handleError(error, res);
}
}
);
router.post("/", async (req: express.Request, res: express.Response) => {
req.body = req.body || {};
const token: string = req.body.token;