Fix 9 bugs and add 103 tests for core anonymization, config, and routing (#669)

This commit is contained in:
Thomas Durieux
2026-04-15 09:41:00 +02:00
committed by GitHub
parent 261eaa8d79
commit 188066e91d
23 changed files with 2630 additions and 39 deletions
+15 -3
View File
@@ -43,7 +43,7 @@ const config: Config = {
GITHUB_TOKEN: "",
DEFAULT_QUOTA: 2 * 1024 * 1024 * 1024 * 8,
MAX_FILE_FOLDER: 1000,
MAX_FILE_SIZE: 100 * 1024 * 1024, // in b, 10MB
MAX_FILE_SIZE: 100 * 1024 * 1024, // in b, 100MB
MAX_REPO_SIZE: 60000, // in kb, 60MB
AUTO_DOWNLOAD_REPO_SIZE: 150, // in kb, 150kb
FREE_DOWNLOAD_REPO_SIZE: 150, // in kb, 150kb
@@ -80,8 +80,20 @@ const config: Config = {
};
for (const conf in process.env) {
if ((config as unknown as Record<string, unknown>)[conf] !== undefined) {
(config as unknown as Record<string, string | undefined>)[conf] = process.env[conf];
const configRecord = config as unknown as Record<string, unknown>;
if (configRecord[conf] !== undefined) {
const currentValue = configRecord[conf];
const envValue = process.env[conf] as string;
if (typeof currentValue === "number") {
const parsed = Number(envValue);
if (!isNaN(parsed)) {
configRecord[conf] = parsed;
}
} else if (typeof currentValue === "boolean") {
configRecord[conf] = envValue === "true" || envValue === "1";
} else {
configRecord[conf] = envValue;
}
}
}
+1 -1
View File
@@ -5,7 +5,7 @@ import { ConferenceStatus } from "./types";
export default class Conference {
private _data: IConferenceDocument;
private _repositories: Repository[] = [];
private _repositories: Repository[] | null = null;
constructor(data: IConferenceDocument) {
this._data = data;
+2 -2
View File
@@ -98,14 +98,14 @@ export default class PullRequest {
/**
* Check the status of the pullRequest
*/
check() {
async check() {
if (
this._model.options.expirationMode !== "never" &&
this.status == "ready" &&
this._model.options.expirationDate
) {
if (this._model.options.expirationDate <= new Date()) {
this.expire();
await this.expire();
}
}
if (
+18 -16
View File
@@ -60,7 +60,6 @@ export default class Repository {
constructor(data: IAnonymizedRepositoryDocument) {
this._model = data;
this.owner = new User(new UserModel({ _id: data.owner }));
this.owner = new User(new UserModel({ _id: data.owner }));
this.owner.model.isNew = false;
}
@@ -169,11 +168,14 @@ export default class Repository {
opt.path = await f.originalPath();
}
let pathQuery: string | RegExp | undefined = opt.path
? new RegExp(`^${opt.path}`)
const escapedPath = opt.path
? opt.path.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&")
: undefined;
let pathQuery: string | RegExp | undefined = escapedPath
? new RegExp(`^${escapedPath}`)
: undefined;
if (opt.recursive === false) {
pathQuery = opt.path ? new RegExp(`^${opt.path}$`) : "";
pathQuery = escapedPath ? new RegExp(`^${escapedPath}$`) : "";
}
const query: FilterQuery<IFile> = {
@@ -328,6 +330,18 @@ export default class Repository {
this.model.source.branch || ghRepo.model.defaultBranch;
const newCommit = branches.filter((f) => f.name == branchName)[0]
?.commit;
if (!newCommit) {
console.error(
`${branchName} for ${this.model.source.repositoryName} is not found`
);
await this.updateStatus(RepositoryStatus.ERROR, "branch_not_found");
await this.resetSate();
throw new AnonymousError("branch_not_found", {
object: this,
httpStatus: 404,
});
}
if (
this.model.source.commit == newCommit &&
this.status == RepositoryStatus.READY
@@ -348,18 +362,6 @@ export default class Repository {
this._model.source.commitDate = new Date(d);
}
this.model.source.commit = newCommit;
if (!newCommit) {
console.error(
`${branchName} for ${this.model.source.repositoryName} is not found`
);
await this.updateStatus(RepositoryStatus.ERROR, "branch_not_found");
await this.resetSate();
throw new AnonymousError("branch_not_found", {
object: this,
httpStatus: 404,
});
}
this._model.anonymizeDate = new Date();
console.log(
`[UPDATE] ${this._model.repoId} will be updated to ${newCommit}`
-2
View File
@@ -102,8 +102,6 @@ export default class GitHubDownload extends GitHubBase {
function humanFileSize(bytes: number, si = false, dp = 1) {
const thresh = si ? 1000 : 1024;
bytes = bytes / 8;
if (Math.abs(bytes) < thresh) {
return bytes + "B";
}
+3 -2
View File
@@ -70,8 +70,9 @@ export default class FileSystem extends StorageBase {
});
}
return await fs.promises.writeFile(fullPath, data, "utf-8");
} catch {
// write error ignored
} catch (err) {
console.error("[ERROR] FileSystem.write failed:", err);
throw err;
}
}
+10 -5
View File
@@ -129,10 +129,10 @@ router.get("/repos", async (req, res) => {
}
const query = [];
if (req.query.search) {
query.push({ repoId: { $regex: req.query.search } });
const escaped = (req.query.search as string).replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
query.push({ repoId: { $regex: escaped } });
}
const status: { status: string }[] = [];
query.push({ $or: status });
if (ready) {
status.push({ status: "ready" });
}
@@ -151,6 +151,9 @@ router.get("/repos", async (req, res) => {
status.push({ status: "preparing" });
status.push({ status: "download" });
}
if (status.length > 0) {
query.push({ $or: status });
}
const skipIndex = (page - 1) * limit;
const [total, results] = await Promise.all([
AnonymizedRepositoryModel.find({
@@ -199,7 +202,8 @@ router.get("/users", async (req, res) => {
}
let query = {};
if (req.query.search) {
query = { username: { $regex: req.query.search } };
const escaped = (req.query.search as string).replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
query = { username: { $regex: escaped } };
}
res.json({
@@ -270,10 +274,11 @@ router.get("/conferences", async (req, res) => {
}
let query = {};
if (req.query.search) {
const escaped = (req.query.search as string).replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
query = {
$or: [
{ name: { $regex: req.query.search } },
{ conferenceID: { $regex: req.query.search } },
{ name: { $regex: escaped } },
{ conferenceID: { $regex: escaped } },
],
};
}
+1 -1
View File
@@ -32,7 +32,7 @@ router.get(
try {
if (!(await repo.isReady())) {
throw new AnonymousError("repository_not_ready", {
object: this,
object: repo,
httpStatus: 503,
});
}
+1 -1
View File
@@ -70,7 +70,7 @@ router.get(
.on("error", () => {
handleError(
new AnonymousError("file_not_found", {
object: this,
object: req.params.repoId,
httpStatus: 404,
}),
res
+4 -2
View File
@@ -20,12 +20,12 @@ export async function getPullRequest(
pullRequest.options.expirationMode == "redirect"
) {
res.redirect(
`http://github.com/${pullRequest.source.repositoryFullName}/pull/${pullRequest.source.pullRequestId}`
`https://github.com/${pullRequest.source.repositoryFullName}/pull/${pullRequest.source.pullRequestId}`
);
return null;
}
pullRequest.check();
await pullRequest.check();
}
return pullRequest;
} catch (error) {
@@ -105,6 +105,8 @@ export function handleError(
let errorCode = error;
if (error instanceof Error) {
errorCode = error.message;
} else if (typeof error !== "string") {
errorCode = String(error);
}
let status = 500;
if (error.httpStatus) {
+13 -4
View File
@@ -8,6 +8,15 @@ import * as sanitizeHtml from "sanitize-html";
import { streamToString } from "../../core/anonymize-utils";
import { IFile } from "../../core/model/files/files.types";
function escapeHtml(str: string): string {
return str
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
const sanitizeOptions: sanitizeHtml.IOptions = {
allowedTags: sanitizeHtml.defaults.allowedTags.concat([
"img",
@@ -120,12 +129,12 @@ async function webView(req: express.Request, res: express.Response) {
});
} else {
// print list of files in the root repository
const body = `<div class="container p-3"><h2>Content of ${filePath}</h2><div class="list-group">${candidates
const body = `<div class="container p-3"><h2>Content of ${escapeHtml(filePath)}</h2><div class="list-group">${candidates
.map(
(c) =>
`<a class="list-group-item list-group-item-action" href="${
c.name + (c.size == null ? "/" : "")
}">${c.name + (c.size == null ? "/" : "")}</a>`
encodeURI(c.name) + (c.size == null ? "/" : "")
}">${escapeHtml(c.name) + (c.size == null ? "/" : "")}</a>`
)
.join("")}</div></div>`;
const html = `<!DOCTYPE html><html><head><title>Content</title></head><link rel="stylesheet" href="/css/all.min.css" /><body>${body}</body></html>`;
@@ -142,7 +151,7 @@ async function webView(req: express.Request, res: express.Response) {
if (f.extension() == "md") {
const content = await streamToString(await f.anonymizedContent());
const body = sanitizeHtml(marked.marked(content, { headerIds: false, mangle: false }), sanitizeOptions);
const html = `<!DOCTYPE html><html><head><title>Content</title></head><link rel="stylesheet" href="/css/all.min.css" /><body><div class="container p-3 file-content markdown-body">${body}<div></body></html>`;
const html = `<!DOCTYPE html><html><head><title>Content</title></head><link rel="stylesheet" href="/css/all.min.css" /><body><div class="container p-3 file-content markdown-body">${body}</div></body></html>`;
res.contentType("text/html").send(html);
} else {
f.send(res);