mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-05-16 06:49:09 +02:00
multiple fixes
This commit is contained in:
+12
-2
@@ -22,6 +22,7 @@ import {
|
||||
import { getToken } from "./GitHubUtils";
|
||||
import config from "../config";
|
||||
import FileModel from "./model/files/files.model";
|
||||
import AnonymizedRepositoryModel from "./model/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import { IFile } from "./model/files/files.types";
|
||||
import AnonymizedFile from "./AnonymizedFile";
|
||||
import { FilterQuery } from "mongoose";
|
||||
@@ -351,7 +352,7 @@ export default class Repository {
|
||||
);
|
||||
|
||||
await this.resetSate(RepositoryStatus.PREPARING);
|
||||
await downloadQueue.add(this.repoId, this, {
|
||||
await downloadQueue.add(this.repoId, { repoId: this.repoId }, {
|
||||
jobId: this.repoId,
|
||||
attempts: 3,
|
||||
});
|
||||
@@ -405,7 +406,16 @@ export default class Repository {
|
||||
this._model.statusDate = new Date();
|
||||
this._model.statusMessage = statusMessage;
|
||||
if (!isConnected) return this.model;
|
||||
await this._model.save();
|
||||
await AnonymizedRepositoryModel.updateOne(
|
||||
{ _id: this._model._id },
|
||||
{
|
||||
$set: {
|
||||
status,
|
||||
statusDate: this._model.statusDate,
|
||||
statusMessage,
|
||||
},
|
||||
}
|
||||
).exec();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
+30
-23
@@ -80,37 +80,44 @@ export default class User {
|
||||
});
|
||||
});
|
||||
|
||||
// find the repositories that are already in the database
|
||||
const finds = (
|
||||
await RepositoryModel.find({
|
||||
externalId: {
|
||||
$in: repositories.map((repo) => repo.externalId),
|
||||
},
|
||||
}).select("externalId")
|
||||
).map((m) => m.externalId);
|
||||
|
||||
// save all the new repositories
|
||||
await Promise.all(
|
||||
repositories
|
||||
.filter((r) => finds.indexOf(r.externalId) == -1)
|
||||
.map((r) => r.save())
|
||||
// find the repositories that are already in the database — fetch both
|
||||
// externalId and id so we can both detect duplicates and reuse the
|
||||
// ids of existing rows without re-querying.
|
||||
const externalIds = repositories.map((repo) => repo.externalId);
|
||||
const existing = await RepositoryModel.find({
|
||||
externalId: { $in: externalIds },
|
||||
}).select("id externalId");
|
||||
const existingByExternalId = new Map(
|
||||
existing.map((m) => [m.externalId, m.id])
|
||||
);
|
||||
|
||||
// save only the if of the repositories in the user model
|
||||
this._model.repositories = (
|
||||
await RepositoryModel.find({
|
||||
externalId: {
|
||||
$in: repositories.map((repo) => repo.externalId),
|
||||
},
|
||||
}).select("id")
|
||||
).map((m) => m.id);
|
||||
// save all the new repositories
|
||||
const newRepos = repositories.filter(
|
||||
(r) => !existingByExternalId.has(r.externalId)
|
||||
);
|
||||
const saved = await Promise.all(newRepos.map((r) => r.save()));
|
||||
for (const m of saved) {
|
||||
existingByExternalId.set(m.externalId, m.id);
|
||||
}
|
||||
|
||||
// collect ids in the order of the upstream repositories list
|
||||
this._model.repositories = externalIds
|
||||
.map((eid) => existingByExternalId.get(eid))
|
||||
.filter((id) => !!id) as unknown as typeof this._model.repositories;
|
||||
|
||||
// have the model
|
||||
await this._model.save();
|
||||
return repositories.map((r) => new GitHubRepository(r));
|
||||
} else {
|
||||
// Only the fields read by GitHubRepository.toJSON() (and the immediate
|
||||
// callers in user routes). Branches/readme are loaded on demand by
|
||||
// GitHubRepository methods, which issue their own queries.
|
||||
const out = (
|
||||
await RepositoryModel.find({ _id: { $in: this._model.repositories } })
|
||||
await RepositoryModel.find({
|
||||
_id: { $in: this._model.repositories },
|
||||
}).select(
|
||||
"externalId name url size hasPage pageSource defaultBranch"
|
||||
)
|
||||
).map((i) => new GitHubRepository(i));
|
||||
return out;
|
||||
}
|
||||
|
||||
+106
-99
@@ -192,8 +192,62 @@ export class AnonymizeTransformer extends Transform {
|
||||
}
|
||||
}
|
||||
|
||||
// Markdown image pattern hoisted out of removeImage() so we don't recompile
|
||||
// it on every chunk of every file streamed through the anonymizer.
|
||||
const markdownImageRegex =
|
||||
/!\[[^\]]*\]\((?<filename>.*?)(?="|\))(?<optionalpart>".*")?\)/g;
|
||||
|
||||
interface CompiledTermVariant {
|
||||
// Global regex used to replace matches in content (and paths).
|
||||
replaceRegex: RegExp;
|
||||
// Non-global twin used inside the URL callback to test() without
|
||||
// mutating shared lastIndex state.
|
||||
testRegex: RegExp;
|
||||
mask: string;
|
||||
}
|
||||
|
||||
function compileTerms(terms: string[] | undefined): CompiledTermVariant[] {
|
||||
if (!terms || terms.length === 0) return [];
|
||||
const compiled: CompiledTermVariant[] = [];
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
const spec = terms[i];
|
||||
if (spec.trim() === "") continue;
|
||||
// #285 — entries of the form "term=>replacement" override the default
|
||||
// XXXX-N mask so users can scrub with their preferred token.
|
||||
const parsed = parseTermSpec(spec);
|
||||
let term = parsed.term;
|
||||
const mask =
|
||||
parsed.replacement !== null
|
||||
? parsed.replacement
|
||||
: config.ANONYMIZATION_MASK + "-" + (i + 1);
|
||||
try {
|
||||
new RegExp(term, "gi");
|
||||
} catch {
|
||||
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
|
||||
}
|
||||
for (const variant of termVariants(term)) {
|
||||
const bounded = withWordBoundaries(variant.pattern, {
|
||||
sniffSource: variant.sniff,
|
||||
unicode: variant.unicode,
|
||||
});
|
||||
const baseFlags = variant.unicode ? "iu" : "i";
|
||||
compiled.push({
|
||||
replaceRegex: new RegExp(bounded, "g" + baseFlags),
|
||||
testRegex: new RegExp(bounded, baseFlags),
|
||||
mask,
|
||||
});
|
||||
}
|
||||
}
|
||||
return compiled;
|
||||
}
|
||||
|
||||
export class ContentAnonimizer {
|
||||
public wasAnonymized = false;
|
||||
// Compiled once per instance and reused for every anonymize() call.
|
||||
// Streamed files invoke anonymize() many times per file (one per chunk),
|
||||
// so caching here avoids rebuilding regexes on every chunk.
|
||||
private compiledTerms: CompiledTermVariant[];
|
||||
private selfLinkRegexes: RegExp[] | null = null;
|
||||
|
||||
constructor(
|
||||
readonly opt: {
|
||||
@@ -204,26 +258,33 @@ export class ContentAnonimizer {
|
||||
branchName?: string;
|
||||
repoId?: string;
|
||||
}
|
||||
) {}
|
||||
) {
|
||||
this.compiledTerms = compileTerms(opt.terms);
|
||||
if (opt.repoName && opt.branchName) {
|
||||
const r = opt.repoName;
|
||||
const b = opt.branchName;
|
||||
this.selfLinkRegexes = [
|
||||
new RegExp(`https://raw.githubusercontent.com/${r}/${b}\\b`, "gi"),
|
||||
new RegExp(`https://github.com/${r}/blob/${b}\\b`, "gi"),
|
||||
new RegExp(`https://github.com/${r}/tree/${b}\\b`, "gi"),
|
||||
new RegExp(`https://github.com/${r}`, "gi"),
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
private removeImage(content: string): string {
|
||||
if (this.opt.image !== false) {
|
||||
return content;
|
||||
}
|
||||
// remove image in markdown
|
||||
return content.replace(
|
||||
/!\[[^\]]*\]\((?<filename>.*?)(?="|\))(?<optionalpart>".*")?\)/g,
|
||||
() => {
|
||||
this.wasAnonymized = true;
|
||||
return config.ANONYMIZATION_MASK;
|
||||
}
|
||||
);
|
||||
return content.replace(markdownImageRegex, () => {
|
||||
this.wasAnonymized = true;
|
||||
return config.ANONYMIZATION_MASK;
|
||||
});
|
||||
}
|
||||
private removeLink(content: string): string {
|
||||
if (this.opt.link !== false) {
|
||||
return content;
|
||||
}
|
||||
// remove image in markdown
|
||||
return content.replace(urlRegex, () => {
|
||||
this.wasAnonymized = true;
|
||||
return config.ANONYMIZATION_MASK;
|
||||
@@ -231,83 +292,33 @@ export class ContentAnonimizer {
|
||||
}
|
||||
|
||||
private replaceGitHubSelfLinks(content: string): string {
|
||||
if (!this.opt.repoName || !this.opt.branchName) {
|
||||
return content;
|
||||
}
|
||||
const repoName = this.opt.repoName;
|
||||
const branchName = this.opt.branchName;
|
||||
|
||||
const replaceCallback = () => {
|
||||
if (!this.selfLinkRegexes) return content;
|
||||
const replacement = `https://${config.APP_HOSTNAME}/r/${this.opt.repoId}`;
|
||||
const cb = () => {
|
||||
this.wasAnonymized = true;
|
||||
return `https://${config.APP_HOSTNAME}/r/${this.opt.repoId}`;
|
||||
return replacement;
|
||||
};
|
||||
content = content.replace(
|
||||
new RegExp(
|
||||
`https://raw.githubusercontent.com/${repoName}/${branchName}\\b`,
|
||||
"gi"
|
||||
),
|
||||
replaceCallback
|
||||
);
|
||||
content = content.replace(
|
||||
new RegExp(`https://github.com/${repoName}/blob/${branchName}\\b`, "gi"),
|
||||
replaceCallback
|
||||
);
|
||||
content = content.replace(
|
||||
new RegExp(`https://github.com/${repoName}/tree/${branchName}\\b`, "gi"),
|
||||
replaceCallback
|
||||
);
|
||||
return content.replace(
|
||||
new RegExp(`https://github.com/${repoName}`, "gi"),
|
||||
replaceCallback
|
||||
);
|
||||
for (const re of this.selfLinkRegexes) {
|
||||
content = content.replace(re, cb);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
private replaceTerms(content: string): string {
|
||||
const terms = this.opt.terms || [];
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
const spec = terms[i];
|
||||
if (spec.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
// #285 — entries of the form "term=>replacement" override the default
|
||||
// XXXX-N mask so users can scrub with their preferred token (e.g.
|
||||
// "ABC", "XYZ"), keeping anonymized identifiers valid in source code.
|
||||
const parsed = parseTermSpec(spec);
|
||||
let term = parsed.term;
|
||||
const mask =
|
||||
parsed.replacement !== null
|
||||
? parsed.replacement
|
||||
: config.ANONYMIZATION_MASK + "-" + (i + 1);
|
||||
try {
|
||||
new RegExp(term, "gi");
|
||||
} catch {
|
||||
// escape regex characters
|
||||
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
|
||||
}
|
||||
|
||||
// Try the term verbatim first, then a diacritic-insensitive expansion
|
||||
// so "Davo" anonymizes "Davó" (and vice versa). See term-matching.ts.
|
||||
for (const variant of termVariants(term)) {
|
||||
const bounded = withWordBoundaries(variant.pattern, {
|
||||
sniffSource: variant.sniff,
|
||||
unicode: variant.unicode,
|
||||
});
|
||||
const flags = variant.unicode ? "giu" : "gi";
|
||||
// remove whole url if it contains the term
|
||||
content = content.replace(urlRegex, (match) => {
|
||||
if (new RegExp(bounded, flags).test(match)) {
|
||||
this.wasAnonymized = true;
|
||||
return mask;
|
||||
}
|
||||
return match;
|
||||
});
|
||||
|
||||
// remove the term in the text
|
||||
content = content.replace(new RegExp(bounded, flags), () => {
|
||||
for (const c of this.compiledTerms) {
|
||||
// remove whole url if it contains the term
|
||||
content = content.replace(urlRegex, (match) => {
|
||||
if (c.testRegex.test(match)) {
|
||||
this.wasAnonymized = true;
|
||||
return mask;
|
||||
});
|
||||
}
|
||||
return c.mask;
|
||||
}
|
||||
return match;
|
||||
});
|
||||
// remove the term in the text
|
||||
content = content.replace(c.replaceRegex, () => {
|
||||
this.wasAnonymized = true;
|
||||
return c.mask;
|
||||
});
|
||||
}
|
||||
return content;
|
||||
}
|
||||
@@ -322,24 +333,20 @@ export class ContentAnonimizer {
|
||||
}
|
||||
|
||||
export function anonymizePath(path: string, terms: string[]) {
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
const spec = terms[i];
|
||||
if (spec.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
const parsed = parseTermSpec(spec);
|
||||
let term = parsed.term;
|
||||
const mask =
|
||||
parsed.replacement !== null
|
||||
? parsed.replacement
|
||||
: config.ANONYMIZATION_MASK + "-" + (i + 1);
|
||||
try {
|
||||
new RegExp(term, "gi");
|
||||
} catch {
|
||||
// escape regex characters
|
||||
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
|
||||
}
|
||||
path = path.replace(new RegExp(term, "gi"), mask);
|
||||
return anonymizePathCompiled(path, compileTerms(terms));
|
||||
}
|
||||
|
||||
// Variant that accepts pre-compiled term regexes — call sites that anonymize
|
||||
// many paths in a row (tree traversal) should compile once and reuse.
|
||||
export function anonymizePathCompiled(
|
||||
path: string,
|
||||
compiled: CompiledTermVariant[]
|
||||
) {
|
||||
for (const c of compiled) {
|
||||
path = path.replace(c.replaceRegex, c.mask);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
export { compileTerms };
|
||||
export type { CompiledTermVariant };
|
||||
|
||||
+90
-12
@@ -1,11 +1,76 @@
|
||||
import { Queue, Worker } from "bullmq";
|
||||
import config from "../config";
|
||||
import Repository from "../core/Repository";
|
||||
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import { RepositoryStatus } from "../core/types";
|
||||
import * as path from "path";
|
||||
|
||||
export let cacheQueue: Queue<Repository>;
|
||||
export let removeQueue: Queue<Repository>;
|
||||
export let downloadQueue: Queue<Repository>;
|
||||
// Minimal payload for queue jobs. Workers re-fetch the Repository from the
|
||||
// database via getRepository(repoId), so passing the full Mongoose-backed
|
||||
// Repository instance through msgpackr is unnecessary — and triggers
|
||||
// ERR_BUFFER_OUT_OF_BOUNDS on long term lists / large nested fields.
|
||||
export interface RepoJobData {
|
||||
repoId: string;
|
||||
}
|
||||
|
||||
const IN_FLIGHT_STATUSES: RepositoryStatus[] = [
|
||||
RepositoryStatus.PREPARING,
|
||||
RepositoryStatus.QUEUE,
|
||||
RepositoryStatus.DOWNLOAD,
|
||||
];
|
||||
|
||||
async function markErrorIfInFlight(repoId: string, message: string) {
|
||||
try {
|
||||
await AnonymizedRepositoryModel.updateOne(
|
||||
{ repoId, status: { $in: IN_FLIGHT_STATUSES } },
|
||||
{
|
||||
$set: {
|
||||
status: RepositoryStatus.ERROR,
|
||||
statusDate: new Date(),
|
||||
statusMessage: message || "preparation_failed",
|
||||
},
|
||||
}
|
||||
).exec();
|
||||
} catch (e) {
|
||||
console.log("[QUEUE] markErrorIfInFlight error", repoId, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recover repositories left in an in-flight status (preparing/queue/download)
|
||||
* with no live BullMQ job — typically caused by a worker process crash or
|
||||
* server restart during anonymization. Marks them as ERROR so they don't
|
||||
* appear stuck forever; the public route can re-queue them on next visit.
|
||||
*/
|
||||
export async function recoverStuckPreparing() {
|
||||
if (!downloadQueue) return;
|
||||
try {
|
||||
const stuck = await AnonymizedRepositoryModel.find(
|
||||
{ status: { $in: IN_FLIGHT_STATUSES } },
|
||||
{ repoId: 1 }
|
||||
).lean();
|
||||
for (const doc of stuck) {
|
||||
try {
|
||||
const job = await downloadQueue.getJob(doc.repoId);
|
||||
if (job) {
|
||||
const state = await job.getState();
|
||||
if (state === "active" || state === "waiting" || state === "delayed") {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
await markErrorIfInFlight(doc.repoId, "preparation_interrupted");
|
||||
console.log("[QUEUE] recovered stuck repo", doc.repoId);
|
||||
} catch (e) {
|
||||
console.log("[QUEUE] recover error for", doc.repoId, e);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.log("[QUEUE] recoverStuckPreparing failed", e);
|
||||
}
|
||||
}
|
||||
|
||||
export let cacheQueue: Queue<RepoJobData>;
|
||||
export let removeQueue: Queue<RepoJobData>;
|
||||
export let downloadQueue: Queue<RepoJobData>;
|
||||
|
||||
// avoid to load the queue outside the main server
|
||||
export function startWorker() {
|
||||
@@ -14,28 +79,31 @@ export function startWorker() {
|
||||
port: config.REDIS_PORT,
|
||||
};
|
||||
|
||||
cacheQueue = new Queue<Repository>("cache removal", {
|
||||
cacheQueue = new Queue<RepoJobData>("cache removal", {
|
||||
connection,
|
||||
defaultJobOptions: {
|
||||
removeOnComplete: true,
|
||||
removeOnFail: true,
|
||||
},
|
||||
});
|
||||
removeQueue = new Queue<Repository>("repository removal", {
|
||||
removeQueue = new Queue<RepoJobData>("repository removal", {
|
||||
connection: {
|
||||
host: config.REDIS_HOSTNAME,
|
||||
port: config.REDIS_PORT,
|
||||
},
|
||||
defaultJobOptions: {
|
||||
removeOnComplete: true,
|
||||
removeOnFail: true,
|
||||
},
|
||||
});
|
||||
downloadQueue = new Queue<Repository>("repository download", {
|
||||
downloadQueue = new Queue<RepoJobData>("repository download", {
|
||||
connection,
|
||||
defaultJobOptions: {
|
||||
removeOnComplete: true,
|
||||
removeOnFail: true,
|
||||
},
|
||||
});
|
||||
const cacheWorker = new Worker<Repository>(
|
||||
const cacheWorker = new Worker<RepoJobData>(
|
||||
cacheQueue.name,
|
||||
path.resolve("build/queue/processes/removeCache.js"),
|
||||
{
|
||||
@@ -47,7 +115,7 @@ export function startWorker() {
|
||||
cacheWorker.on("completed", async (job) => {
|
||||
await job.remove();
|
||||
});
|
||||
const removeWorker = new Worker<Repository>(
|
||||
const removeWorker = new Worker<RepoJobData>(
|
||||
removeQueue.name,
|
||||
path.resolve("build/queue/processes/removeRepository.js"),
|
||||
{
|
||||
@@ -60,7 +128,7 @@ export function startWorker() {
|
||||
await job.remove();
|
||||
});
|
||||
|
||||
const downloadWorker = new Worker<Repository>(
|
||||
const downloadWorker = new Worker<RepoJobData>(
|
||||
downloadQueue.name,
|
||||
path.resolve("build/queue/processes/downloadRepository.js"),
|
||||
{
|
||||
@@ -77,7 +145,17 @@ export function startWorker() {
|
||||
downloadWorker.on("completed", async (job) => {
|
||||
console.log("[QUEUE] download repository completed", job.data.repoId);
|
||||
});
|
||||
downloadWorker.on("failed", async (job) => {
|
||||
console.log("download repository failed", job.data.repoId);
|
||||
downloadWorker.on("failed", async (job, err) => {
|
||||
const repoId = job?.data?.repoId;
|
||||
console.log(
|
||||
"[QUEUE] download repository failed",
|
||||
repoId,
|
||||
err?.message || err
|
||||
);
|
||||
if (!repoId) return;
|
||||
if (job && typeof job.attemptsMade === "number" && job.opts?.attempts) {
|
||||
if (job.attemptsMade < job.opts.attempts) return;
|
||||
}
|
||||
await markErrorIfInFlight(repoId, err?.message || "preparation_failed");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import { SandboxedJob } from "bullmq";
|
||||
import { config } from "dotenv";
|
||||
config();
|
||||
import Repository from "../../core/Repository";
|
||||
import { getRepository as getRepositoryImport } from "../../server/database";
|
||||
import { RepositoryStatus } from "../../core/types";
|
||||
import { RepoJobData } from "../index";
|
||||
|
||||
export default async function (job: SandboxedJob<Repository, void>) {
|
||||
export default async function (job: SandboxedJob<RepoJobData, void>) {
|
||||
const {
|
||||
connect,
|
||||
getRepository,
|
||||
@@ -18,29 +18,36 @@ export default async function (job: SandboxedJob<Repository, void>) {
|
||||
let statusInterval: any = null;
|
||||
await connect();
|
||||
const repo = await getRepository(job.data.repoId);
|
||||
let tickPromise: Promise<void> | null = null;
|
||||
try {
|
||||
let progress: { status: string } | null = null;
|
||||
statusInterval = setInterval(async () => {
|
||||
try {
|
||||
if (
|
||||
repo.status == RepositoryStatus.READY ||
|
||||
repo.status == RepositoryStatus.ERROR
|
||||
) {
|
||||
return clearInterval(statusInterval);
|
||||
statusInterval = setInterval(() => {
|
||||
if (tickPromise) return;
|
||||
tickPromise = (async () => {
|
||||
try {
|
||||
if (
|
||||
repo.status == RepositoryStatus.READY ||
|
||||
repo.status == RepositoryStatus.ERROR
|
||||
) {
|
||||
clearInterval(statusInterval);
|
||||
return;
|
||||
}
|
||||
if (
|
||||
progress &&
|
||||
repo.status &&
|
||||
repo.model.statusMessage !== progress?.status
|
||||
) {
|
||||
console.log(
|
||||
`[QUEUE] Progress: ${job.data.repoId} ${progress.status}`
|
||||
);
|
||||
await repo.updateStatus(repo.status, progress?.status || "");
|
||||
}
|
||||
} catch {
|
||||
// ignore error
|
||||
} finally {
|
||||
tickPromise = null;
|
||||
}
|
||||
if (
|
||||
progress &&
|
||||
repo.status &&
|
||||
repo.model.statusMessage !== progress?.status
|
||||
) {
|
||||
console.log(
|
||||
`[QUEUE] Progress: ${job.data.repoId} ${progress.status}`
|
||||
);
|
||||
await repo.updateStatus(repo.status, progress?.status || "");
|
||||
}
|
||||
} catch {
|
||||
// ignore error
|
||||
}
|
||||
})();
|
||||
}, 1000);
|
||||
function updateProgress(obj: { status: string } | string) {
|
||||
const o = typeof obj === "string" ? { status: obj } : obj;
|
||||
@@ -51,9 +58,12 @@ export default async function (job: SandboxedJob<Repository, void>) {
|
||||
await repo.resetSate(RepositoryStatus.PREPARING, "");
|
||||
await repo.anonymize(updateProgress);
|
||||
clearInterval(statusInterval);
|
||||
if (tickPromise) await tickPromise;
|
||||
await repo.updateStatus(RepositoryStatus.READY, "");
|
||||
console.log(`[QUEUE] ${job.data.repoId} is downloaded`);
|
||||
} catch (error) {
|
||||
clearInterval(statusInterval);
|
||||
if (tickPromise) await tickPromise;
|
||||
updateProgress({ status: "error" });
|
||||
if (error instanceof Error) {
|
||||
await repo.updateStatus(RepositoryStatus.ERROR, error.message);
|
||||
@@ -64,13 +74,24 @@ export default async function (job: SandboxedJob<Repository, void>) {
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
clearInterval(statusInterval);
|
||||
console.log(`[QUEUE] ${job.data.repoId} is finished with an error`, error);
|
||||
setTimeout(async () => {
|
||||
// delay to avoid double saving
|
||||
if (tickPromise) {
|
||||
try {
|
||||
await repo.updateStatus(RepositoryStatus.ERROR, (error as Error).message);
|
||||
await tickPromise;
|
||||
} catch { /* ignored */ }
|
||||
}, 400);
|
||||
}
|
||||
console.log(`[QUEUE] ${job.data.repoId} is finished with an error`, error);
|
||||
try {
|
||||
await repo.updateStatus(
|
||||
RepositoryStatus.ERROR,
|
||||
error instanceof Error ? error.message : String(error)
|
||||
);
|
||||
} catch (persistError) {
|
||||
console.log(
|
||||
`[QUEUE] failed to persist ERROR status for ${job.data.repoId}`,
|
||||
persistError
|
||||
);
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
clearInterval(statusInterval);
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { SandboxedJob } from "bullmq";
|
||||
import Repository from "../../core/Repository";
|
||||
import { getRepository as getRepositoryImport } from "../../server/database";
|
||||
import { RepoJobData } from "../index";
|
||||
|
||||
export default async function (job: SandboxedJob<Repository, void>) {
|
||||
export default async function (job: SandboxedJob<RepoJobData, void>) {
|
||||
const {
|
||||
connect,
|
||||
getRepository,
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import { SandboxedJob } from "bullmq";
|
||||
import Repository from "../../core/Repository";
|
||||
import { getRepository as getRepositoryImport } from "../../server/database";
|
||||
import { RepositoryStatus } from "../../core/types";
|
||||
import { RepoJobData } from "../index";
|
||||
|
||||
export default async function (job: SandboxedJob<Repository, void>) {
|
||||
export default async function (job: SandboxedJob<RepoJobData, void>) {
|
||||
const {
|
||||
connect,
|
||||
getRepository,
|
||||
|
||||
+22
-6
@@ -16,7 +16,7 @@ import { bearerTokenAuth } from "./routes/token-auth";
|
||||
import router from "./routes";
|
||||
import AnonymizedRepositoryModel from "../core/model/anonymizedRepositories/anonymizedRepositories.model";
|
||||
import { conferenceStatusCheck, repositoryStatusCheck } from "./schedule";
|
||||
import { startWorker } from "../queue";
|
||||
import { startWorker, recoverStuckPreparing } from "../queue";
|
||||
import AnonymizedPullRequestModel from "../core/model/anonymizedPullRequests/anonymizedPullRequests.model";
|
||||
import { getUser } from "./routes/route-utils";
|
||||
import config from "../config";
|
||||
@@ -165,9 +165,17 @@ export default async function start() {
|
||||
apiRouter.use("/gist", speedLimiter, router.gistPrivate);
|
||||
apiRouter.use("/anonymize-preview", speedLimiter, router.anonymizePreview);
|
||||
|
||||
// Cache message.txt presence so /api/message doesn't hit the filesystem
|
||||
// synchronously on every request. Re-checked on a 60s interval — the file
|
||||
// is admin-managed and doesn't need real-time freshness.
|
||||
const messagePath = resolve("message.txt");
|
||||
let messageExists = existsSync(messagePath);
|
||||
setInterval(() => {
|
||||
messageExists = existsSync(messagePath);
|
||||
}, 60 * 1000).unref();
|
||||
apiRouter.get("/message", async (_, res) => {
|
||||
if (existsSync("./message.txt")) {
|
||||
return res.sendFile(resolve("message.txt"));
|
||||
if (messageExists) {
|
||||
return res.sendFile(messagePath);
|
||||
}
|
||||
res.sendStatus(404);
|
||||
});
|
||||
@@ -186,10 +194,17 @@ export default async function start() {
|
||||
res.json(stat);
|
||||
return;
|
||||
}
|
||||
const [nbRepositories, users, nbPageViews, nbPullRequests] =
|
||||
const [nbRepositories, nbUsersAgg, nbPageViews, nbPullRequests] =
|
||||
await Promise.all([
|
||||
AnonymizedRepositoryModel.estimatedDocumentCount(),
|
||||
AnonymizedRepositoryModel.distinct("owner"),
|
||||
// Count distinct owners server-side instead of materializing the full
|
||||
// list of ObjectIds with `.distinct("owner")` only to take its length.
|
||||
AnonymizedRepositoryModel.collection
|
||||
.aggregate([
|
||||
{ $group: { _id: "$owner" } },
|
||||
{ $count: "n" },
|
||||
])
|
||||
.toArray(),
|
||||
AnonymizedRepositoryModel.collection
|
||||
.aggregate([
|
||||
{
|
||||
@@ -202,7 +217,7 @@ export default async function start() {
|
||||
|
||||
stat = {
|
||||
nbRepositories,
|
||||
nbUsers: users.length,
|
||||
nbUsers: (nbUsersAgg[0] as { n?: number } | undefined)?.n || 0,
|
||||
nbPageViews: nbPageViews[0]?.total || 0,
|
||||
nbPullRequests,
|
||||
};
|
||||
@@ -235,6 +250,7 @@ export default async function start() {
|
||||
repositoryStatusCheck();
|
||||
|
||||
await connect();
|
||||
await recoverStuckPreparing();
|
||||
app.listen(config.PORT);
|
||||
console.log("Database connected and Server started on port: " + config.PORT);
|
||||
}
|
||||
|
||||
+428
-107
@@ -10,6 +10,7 @@ import User from "../../core/User";
|
||||
import { ensureAuthenticated } from "./connection";
|
||||
import { handleError, getUser, isOwnerOrAdmin, getRepo } from "./route-utils";
|
||||
import adminTokensRouter from "./admin-tokens";
|
||||
import { octokit, getToken } from "../../core/GitHubUtils";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
@@ -34,17 +35,69 @@ router.use(
|
||||
|
||||
router.use("/tokens", adminTokensRouter);
|
||||
|
||||
router.post("/queue/:name/:repo_id", async (req, res) => {
|
||||
let queue: Queue<Repository, void>;
|
||||
if (req.params.name == "download") {
|
||||
queue = downloadQueue;
|
||||
} else if (req.params.name == "cache") {
|
||||
queue = cacheQueue;
|
||||
} else if (req.params.name == "remove") {
|
||||
queue = removeQueue;
|
||||
} else {
|
||||
return res.status(404).json({ error: "queue_not_found" });
|
||||
const QUEUE_STATES = [
|
||||
"waiting",
|
||||
"active",
|
||||
"completed",
|
||||
"failed",
|
||||
"delayed",
|
||||
] as const;
|
||||
|
||||
function pickQueue(name: string): Queue | null {
|
||||
if (name === "download") return downloadQueue;
|
||||
if (name === "cache") return cacheQueue;
|
||||
if (name === "remove") return removeQueue;
|
||||
return null;
|
||||
}
|
||||
|
||||
function escapeRegex(s: string): string {
|
||||
return s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
|
||||
}
|
||||
|
||||
function parseSort(req: express.Request, fallbackField = "_id"): Record<string, 1 | -1> {
|
||||
const direction = req.query.direction === "asc" ? 1 : -1;
|
||||
const field = (req.query.sort as string) || fallbackField;
|
||||
return { [field]: direction };
|
||||
}
|
||||
|
||||
function parseDateRange(req: express.Request, field: string) {
|
||||
const range: Record<string, Date> = {};
|
||||
if (req.query.dateFrom) {
|
||||
const d = new Date(req.query.dateFrom as string);
|
||||
if (!isNaN(d.getTime())) range.$gte = d;
|
||||
}
|
||||
if (req.query.dateTo) {
|
||||
const d = new Date(req.query.dateTo as string);
|
||||
if (!isNaN(d.getTime())) range.$lte = d;
|
||||
}
|
||||
if (Object.keys(range).length === 0) return null;
|
||||
return { [field]: range };
|
||||
}
|
||||
|
||||
function csvEscape(v: unknown): string {
|
||||
if (v == null) return "";
|
||||
const s = typeof v === "object" ? JSON.stringify(v) : String(v);
|
||||
if (/[",\n\r]/.test(s)) return `"${s.replace(/"/g, '""')}"`;
|
||||
return s;
|
||||
}
|
||||
|
||||
function sendCsv(
|
||||
res: express.Response,
|
||||
filename: string,
|
||||
columns: string[],
|
||||
rows: Array<Record<string, unknown>>
|
||||
) {
|
||||
const header = columns.join(",");
|
||||
const lines = rows.map((r) => columns.map((c) => csvEscape(r[c])).join(","));
|
||||
const body = [header, ...lines].join("\n");
|
||||
res.setHeader("Content-Type", "text/csv; charset=utf-8");
|
||||
res.setHeader("Content-Disposition", `attachment; filename="${filename}"`);
|
||||
res.send(body);
|
||||
}
|
||||
|
||||
router.post("/queue/:name/:repo_id", async (req, res) => {
|
||||
const queue = pickQueue(req.params.name);
|
||||
if (!queue) return res.status(404).json({ error: "queue_not_found" });
|
||||
let job;
|
||||
try {
|
||||
job = await queue.getJob(req.params.repo_id);
|
||||
@@ -68,16 +121,8 @@ router.post("/queue/:name/:repo_id", async (req, res) => {
|
||||
});
|
||||
|
||||
router.delete("/queue/:name/:repo_id", async (req, res) => {
|
||||
let queue: Queue;
|
||||
if (req.params.name == "download") {
|
||||
queue = downloadQueue;
|
||||
} else if (req.params.name == "cache") {
|
||||
queue = cacheQueue;
|
||||
} else if (req.params.name == "remove") {
|
||||
queue = removeQueue;
|
||||
} else {
|
||||
return res.status(404).json({ error: "queue_not_found" });
|
||||
}
|
||||
const queue = pickQueue(req.params.name);
|
||||
if (!queue) return res.status(404).json({ error: "queue_not_found" });
|
||||
try {
|
||||
const job = await queue.getJob(req.params.repo_id);
|
||||
if (!job) {
|
||||
@@ -90,58 +135,153 @@ router.delete("/queue/:name/:repo_id", async (req, res) => {
|
||||
}
|
||||
});
|
||||
|
||||
// Bulk retry all failed in a queue
|
||||
router.post("/queue/:name/retry-failed", async (req, res) => {
|
||||
const queue = pickQueue(req.params.name);
|
||||
if (!queue) return res.status(404).json({ error: "queue_not_found" });
|
||||
try {
|
||||
const failed = await queue.getJobs(["failed"]);
|
||||
let count = 0;
|
||||
for (const j of failed) {
|
||||
try {
|
||||
await j.retry();
|
||||
count++;
|
||||
} catch {
|
||||
// ignore single job failures
|
||||
}
|
||||
}
|
||||
res.json({ retried: count, total: failed.length });
|
||||
} catch (error) {
|
||||
handleError(error, res, req);
|
||||
}
|
||||
});
|
||||
|
||||
// Bulk drain all waiting/delayed
|
||||
router.post("/queue/:name/drain", async (req, res) => {
|
||||
const queue = pickQueue(req.params.name);
|
||||
if (!queue) return res.status(404).json({ error: "queue_not_found" });
|
||||
try {
|
||||
await queue.drain(true);
|
||||
res.json({ ok: true });
|
||||
} catch (error) {
|
||||
handleError(error, res, req);
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/queues", async (req, res) => {
|
||||
const out = await Promise.all([
|
||||
downloadQueue.getJobs([
|
||||
"waiting",
|
||||
"active",
|
||||
"completed",
|
||||
"failed",
|
||||
"delayed",
|
||||
]),
|
||||
removeQueue.getJobs([
|
||||
"waiting",
|
||||
"active",
|
||||
"completed",
|
||||
"failed",
|
||||
"delayed",
|
||||
]),
|
||||
cacheQueue.getJobs(["waiting", "active", "completed", "failed", "delayed"]),
|
||||
const search = req.query.search ? String(req.query.search).toLowerCase() : "";
|
||||
const stateFilter = req.query.state ? String(req.query.state) : null;
|
||||
const states = stateFilter && (QUEUE_STATES as readonly string[]).includes(stateFilter)
|
||||
? [stateFilter]
|
||||
: (QUEUE_STATES as readonly string[]);
|
||||
|
||||
const [download, remove, cache, dCounts, rCounts, cCounts] = await Promise.all([
|
||||
downloadQueue.getJobs(states),
|
||||
removeQueue.getJobs(states),
|
||||
cacheQueue.getJobs(states),
|
||||
downloadQueue.getJobCounts(...QUEUE_STATES),
|
||||
removeQueue.getJobCounts(...QUEUE_STATES),
|
||||
cacheQueue.getJobCounts(...QUEUE_STATES),
|
||||
]);
|
||||
|
||||
const matches = (job: { id?: string | undefined; name?: string }) => {
|
||||
if (!search) return true;
|
||||
return (
|
||||
(job.id || "").toLowerCase().includes(search) ||
|
||||
(job.name || "").toLowerCase().includes(search)
|
||||
);
|
||||
};
|
||||
|
||||
res.json({
|
||||
downloadQueue: out[0],
|
||||
removeQueue: out[1],
|
||||
cacheQueue: out[2],
|
||||
downloadQueue: download.filter(matches),
|
||||
removeQueue: remove.filter(matches),
|
||||
cacheQueue: cache.filter(matches),
|
||||
counts: {
|
||||
download: dCounts,
|
||||
remove: rCounts,
|
||||
cache: cCounts,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
// Global stats endpoint: counts by status, total disk, recent failures
|
||||
router.get("/stats", async (req, res) => {
|
||||
try {
|
||||
const [statusBreakdown, totalSize, recentErrors, totalUsers, totalConferences] =
|
||||
await Promise.all([
|
||||
AnonymizedRepositoryModel.aggregate([
|
||||
{ $group: { _id: "$status", count: { $sum: 1 }, storage: { $sum: "$size.storage" } } },
|
||||
]),
|
||||
AnonymizedRepositoryModel.aggregate([
|
||||
{ $group: { _id: null, total: { $sum: "$size.storage" } } },
|
||||
]),
|
||||
AnonymizedRepositoryModel.countDocuments({
|
||||
status: "error",
|
||||
statusDate: { $gte: new Date(Date.now() - 1000 * 60 * 60 * 24) },
|
||||
}),
|
||||
UserModel.estimatedDocumentCount(),
|
||||
ConferenceModel.estimatedDocumentCount(),
|
||||
]);
|
||||
res.json({
|
||||
statusBreakdown,
|
||||
totalStorage: totalSize[0]?.total || 0,
|
||||
recentErrors24h: recentErrors,
|
||||
totalUsers,
|
||||
totalConferences,
|
||||
});
|
||||
} catch (error) {
|
||||
handleError(error, res, req);
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/repos", async (req, res) => {
|
||||
const page = parseInt(req.query.page as string) || 1;
|
||||
const limit = parseInt(req.query.limit as string) || 10;
|
||||
const limit = Math.min(parseInt(req.query.limit as string) || 10, 1000);
|
||||
const ready = req.query.ready == "true";
|
||||
const error = req.query.error == "true";
|
||||
const preparing = req.query.preparing == "true";
|
||||
const remove = req.query.removed == "true";
|
||||
const expired = req.query.expired == "true";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
let sort: any = { _id: 1 };
|
||||
if (req.query.sort) {
|
||||
sort = {};
|
||||
sort[req.query.sort as string] = -1;
|
||||
}
|
||||
const query = [];
|
||||
const sort = parseSort(req);
|
||||
const query: Record<string, unknown>[] = [];
|
||||
|
||||
// multi-field search: repoId, source.repositoryName, statusMessage, conference
|
||||
if (req.query.search) {
|
||||
const escaped = (req.query.search as string).replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
|
||||
query.push({ repoId: { $regex: escaped } });
|
||||
const escaped = escapeRegex(req.query.search as string);
|
||||
const re = { $regex: escaped, $options: "i" };
|
||||
query.push({
|
||||
$or: [
|
||||
{ repoId: re },
|
||||
{ "source.repositoryName": re },
|
||||
{ statusMessage: re },
|
||||
{ conference: re },
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
// filter by owner username
|
||||
if (req.query.owner) {
|
||||
const ownerUsername = req.query.owner as string;
|
||||
const ownerDoc = await UserModel.findOne({ username: ownerUsername }, { _id: 1 });
|
||||
if (!ownerDoc) {
|
||||
return res.json({ query: { $and: query }, page, total: 0, sort, results: [], statusCounts: [], totalSize: 0 });
|
||||
}
|
||||
query.push({ owner: ownerDoc._id });
|
||||
}
|
||||
|
||||
// filter by conference
|
||||
if (req.query.conference) {
|
||||
query.push({ conference: req.query.conference });
|
||||
}
|
||||
|
||||
// date range filter on anonymizeDate
|
||||
const dateFilter = parseDateRange(req, "anonymizeDate");
|
||||
if (dateFilter) query.push(dateFilter);
|
||||
|
||||
const status: { status: string }[] = [];
|
||||
if (ready) {
|
||||
status.push({ status: "ready" });
|
||||
}
|
||||
if (error) {
|
||||
status.push({ status: "error" });
|
||||
}
|
||||
if (ready) status.push({ status: "ready" });
|
||||
if (error) status.push({ status: "error" });
|
||||
if (expired) {
|
||||
status.push({ status: "expiring" });
|
||||
status.push({ status: "expired" });
|
||||
@@ -157,23 +297,59 @@ router.get("/repos", async (req, res) => {
|
||||
if (status.length > 0) {
|
||||
query.push({ $or: status });
|
||||
}
|
||||
|
||||
const filter = query.length ? { $and: query } : {};
|
||||
const skipIndex = (page - 1) * limit;
|
||||
const [total, results] = await Promise.all([
|
||||
AnonymizedRepositoryModel.find({
|
||||
$and: query,
|
||||
}).countDocuments(),
|
||||
AnonymizedRepositoryModel.find({ $and: query })
|
||||
|
||||
// CSV export branch
|
||||
if (req.query.format === "csv") {
|
||||
const all = await AnonymizedRepositoryModel.find(filter).sort(sort).limit(50000).lean();
|
||||
const rows = all.map((r) => ({
|
||||
repoId: r.repoId,
|
||||
status: r.status,
|
||||
statusMessage: r.statusMessage || "",
|
||||
anonymizeDate: r.anonymizeDate ? new Date(r.anonymizeDate).toISOString() : "",
|
||||
lastView: r.lastView ? new Date(r.lastView).toISOString() : "",
|
||||
pageView: r.pageView || 0,
|
||||
sourceRepository: r.source?.repositoryName || "",
|
||||
sourceBranch: r.source?.branch || "",
|
||||
sourceCommit: r.source?.commit || "",
|
||||
conference: r.conference || "",
|
||||
storage: r.size?.storage || 0,
|
||||
terms: (r.options?.terms || []).length,
|
||||
}));
|
||||
return sendCsv(
|
||||
res,
|
||||
`repositories-${new Date().toISOString().slice(0, 10)}.csv`,
|
||||
Object.keys(rows[0] || { repoId: 1 }),
|
||||
rows
|
||||
);
|
||||
}
|
||||
|
||||
const [total, results, statusCounts, sizeAgg] = await Promise.all([
|
||||
AnonymizedRepositoryModel.find(filter).countDocuments(),
|
||||
AnonymizedRepositoryModel.find(filter)
|
||||
.skip(skipIndex)
|
||||
.sort(sort)
|
||||
.limit(limit)
|
||||
.exec(),
|
||||
AnonymizedRepositoryModel.aggregate([
|
||||
{ $match: filter },
|
||||
{ $group: { _id: "$status", count: { $sum: 1 }, storage: { $sum: "$size.storage" } } },
|
||||
]),
|
||||
AnonymizedRepositoryModel.aggregate([
|
||||
{ $match: filter },
|
||||
{ $group: { _id: null, total: { $sum: "$size.storage" } } },
|
||||
]),
|
||||
]);
|
||||
res.json({
|
||||
query: { $and: query },
|
||||
query: filter,
|
||||
page,
|
||||
total,
|
||||
sort,
|
||||
results,
|
||||
statusCounts,
|
||||
totalSize: sizeAgg[0]?.total || 0,
|
||||
});
|
||||
});
|
||||
|
||||
@@ -184,7 +360,7 @@ router.delete(
|
||||
const repo = await getRepo(req, res, { nocheck: true });
|
||||
if (!repo) return;
|
||||
try {
|
||||
await cacheQueue.add(repo.repoId, repo, { jobId: repo.repoId });
|
||||
await cacheQueue.add(repo.repoId, { repoId: repo.repoId }, { jobId: repo.repoId });
|
||||
return res.json({ status: repo.status });
|
||||
} catch (error) {
|
||||
handleError(error, res, req);
|
||||
@@ -192,33 +368,163 @@ router.delete(
|
||||
}
|
||||
);
|
||||
|
||||
// Live GitHub info for a repository (admin diagnostic)
|
||||
router.get(
|
||||
"/repos/:repoId/github",
|
||||
async (req: express.Request, res: express.Response) => {
|
||||
try {
|
||||
const repo = await getRepo(req, res, { nocheck: true });
|
||||
if (!repo) return;
|
||||
|
||||
let token: string | undefined;
|
||||
try {
|
||||
token = await getToken(repo);
|
||||
} catch {
|
||||
token = undefined;
|
||||
}
|
||||
const oct = octokit(token || "");
|
||||
const fullName = repo.model.source?.repositoryName || "";
|
||||
const [owner, name] = fullName.split("/");
|
||||
if (!owner || !name) {
|
||||
return res.status(400).json({ error: "invalid_source_repository" });
|
||||
}
|
||||
|
||||
const out: Record<string, unknown> = {
|
||||
source: { owner, repo: name, branch: repo.model.source?.branch, commit: repo.model.source?.commit },
|
||||
};
|
||||
try {
|
||||
const info = await oct.repos.get({ owner, repo: name });
|
||||
out.repository = {
|
||||
fullName: info.data.full_name,
|
||||
private: info.data.private,
|
||||
archived: info.data.archived,
|
||||
disabled: info.data.disabled,
|
||||
defaultBranch: info.data.default_branch,
|
||||
description: info.data.description,
|
||||
stargazers: info.data.stargazers_count,
|
||||
watchers: info.data.watchers_count,
|
||||
forks: info.data.forks_count,
|
||||
openIssues: info.data.open_issues_count,
|
||||
size: info.data.size,
|
||||
language: info.data.language,
|
||||
license: info.data.license?.spdx_id,
|
||||
createdAt: info.data.created_at,
|
||||
updatedAt: info.data.updated_at,
|
||||
pushedAt: info.data.pushed_at,
|
||||
htmlUrl: info.data.html_url,
|
||||
topics: info.data.topics,
|
||||
};
|
||||
} catch (e) {
|
||||
out.repositoryError = (e as Error)?.message || String(e);
|
||||
}
|
||||
try {
|
||||
if (repo.model.source?.branch) {
|
||||
const br = await oct.repos.getBranch({ owner, repo: name, branch: repo.model.source.branch });
|
||||
out.branch = {
|
||||
name: br.data.name,
|
||||
protected: br.data.protected,
|
||||
commitSha: br.data.commit?.sha,
|
||||
};
|
||||
}
|
||||
} catch (e) {
|
||||
out.branchError = (e as Error)?.message || String(e);
|
||||
}
|
||||
try {
|
||||
if (repo.model.source?.commit) {
|
||||
const c = await oct.repos.getCommit({ owner, repo: name, ref: repo.model.source.commit });
|
||||
out.commit = {
|
||||
sha: c.data.sha,
|
||||
message: c.data.commit?.message,
|
||||
author: c.data.commit?.author,
|
||||
committer: c.data.commit?.committer,
|
||||
htmlUrl: c.data.html_url,
|
||||
stats: c.data.stats,
|
||||
filesChanged: c.data.files?.length,
|
||||
};
|
||||
}
|
||||
} catch (e) {
|
||||
out.commitError = (e as Error)?.message || String(e);
|
||||
}
|
||||
try {
|
||||
const r = await oct.rateLimit.get();
|
||||
out.rateLimit = {
|
||||
remaining: r.data.rate.remaining,
|
||||
limit: r.data.rate.limit,
|
||||
reset: new Date(r.data.rate.reset * 1000).toISOString(),
|
||||
};
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
res.json(out);
|
||||
} catch (error) {
|
||||
handleError(error, res, req);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
router.get("/users", async (req, res) => {
|
||||
const page = parseInt(req.query.page as string) || 1;
|
||||
const limit = parseInt(req.query.limit as string) || 10;
|
||||
const limit = Math.min(parseInt(req.query.limit as string) || 10, 1000);
|
||||
const skipIndex = (page - 1) * limit;
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
let sort: any = { _id: 1 };
|
||||
if (req.query.sort) {
|
||||
sort = {};
|
||||
sort[req.query.sort as string] = -1;
|
||||
}
|
||||
let query = {};
|
||||
const sort = parseSort(req);
|
||||
const filter: Record<string, unknown> = {};
|
||||
if (req.query.search) {
|
||||
const escaped = (req.query.search as string).replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
|
||||
query = { username: { $regex: escaped } };
|
||||
const escaped = escapeRegex(req.query.search as string);
|
||||
filter.$or = [
|
||||
{ username: { $regex: escaped, $options: "i" } },
|
||||
{ "emails.email": { $regex: escaped, $options: "i" } },
|
||||
];
|
||||
}
|
||||
if (req.query.status) {
|
||||
filter.status = req.query.status;
|
||||
}
|
||||
if (req.query.role === "admin") {
|
||||
filter.isAdmin = true;
|
||||
}
|
||||
const dateFilter = parseDateRange(req, "dateOfEntry");
|
||||
if (dateFilter) Object.assign(filter, dateFilter);
|
||||
|
||||
// CSV export
|
||||
if (req.query.format === "csv") {
|
||||
const all = await UserModel.find(filter).sort(sort).limit(50000).lean();
|
||||
const rows = all.map((u) => ({
|
||||
username: u.username,
|
||||
email: u.emails?.[0]?.email || "",
|
||||
status: u.status,
|
||||
isAdmin: !!u.isAdmin,
|
||||
repoCount: (u.repositories || []).length,
|
||||
dateOfEntry: u.dateOfEntry ? new Date(u.dateOfEntry).toISOString() : "",
|
||||
}));
|
||||
return sendCsv(
|
||||
res,
|
||||
`users-${new Date().toISOString().slice(0, 10)}.csv`,
|
||||
["username", "email", "status", "isAdmin", "repoCount", "dateOfEntry"],
|
||||
rows
|
||||
);
|
||||
}
|
||||
|
||||
res.json({
|
||||
query: query,
|
||||
page,
|
||||
total: await UserModel.find(query).countDocuments(),
|
||||
sort,
|
||||
results: await UserModel.find(query)
|
||||
.sort(sort)
|
||||
.limit(limit)
|
||||
.skip(skipIndex),
|
||||
});
|
||||
const [total, results, statusCounts] = await Promise.all([
|
||||
UserModel.find(filter).countDocuments(),
|
||||
UserModel.aggregate([
|
||||
{ $match: filter },
|
||||
{ $sort: sort },
|
||||
{ $skip: skipIndex },
|
||||
{ $limit: limit },
|
||||
{
|
||||
$addFields: {
|
||||
repoCount: { $size: { $ifNull: ["$repositories", []] } },
|
||||
},
|
||||
},
|
||||
{ $project: { accessTokens: 0, apiTokens: 0 } },
|
||||
]),
|
||||
UserModel.aggregate([
|
||||
{ $match: filter },
|
||||
{ $group: { _id: "$status", count: { $sum: 1 } } },
|
||||
]),
|
||||
]);
|
||||
|
||||
res.json({ query: filter, page, total, sort, results, statusCounts });
|
||||
});
|
||||
router.get(
|
||||
"/users/:username",
|
||||
@@ -266,35 +572,50 @@ router.get(
|
||||
);
|
||||
router.get("/conferences", async (req, res) => {
|
||||
const page = parseInt(req.query.page as string) || 1;
|
||||
const limit = parseInt(req.query.limit as string) || 10;
|
||||
const limit = Math.min(parseInt(req.query.limit as string) || 10, 1000);
|
||||
const skipIndex = (page - 1) * limit;
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
let sort: any = { _id: 1 };
|
||||
if (req.query.sort) {
|
||||
sort = {};
|
||||
sort[req.query.sort as string] = -1;
|
||||
}
|
||||
let query = {};
|
||||
const sort = parseSort(req);
|
||||
const filter: Record<string, unknown> = {};
|
||||
if (req.query.search) {
|
||||
const escaped = (req.query.search as string).replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
|
||||
query = {
|
||||
$or: [
|
||||
{ name: { $regex: escaped } },
|
||||
{ conferenceID: { $regex: escaped } },
|
||||
],
|
||||
};
|
||||
const escaped = escapeRegex(req.query.search as string);
|
||||
filter.$or = [
|
||||
{ name: { $regex: escaped, $options: "i" } },
|
||||
{ conferenceID: { $regex: escaped, $options: "i" } },
|
||||
];
|
||||
}
|
||||
res.json({
|
||||
query: query,
|
||||
page,
|
||||
total: await ConferenceModel.find(query).estimatedDocumentCount(),
|
||||
sort,
|
||||
results: await ConferenceModel.find(query)
|
||||
.sort(sort)
|
||||
.limit(limit)
|
||||
.skip(skipIndex),
|
||||
});
|
||||
if (req.query.status) filter.status = req.query.status;
|
||||
const dateFilter = parseDateRange(req, "startDate");
|
||||
if (dateFilter) Object.assign(filter, dateFilter);
|
||||
|
||||
if (req.query.format === "csv") {
|
||||
const all = await ConferenceModel.find(filter).sort(sort).limit(50000).lean();
|
||||
const rows = all.map((c: Record<string, unknown>) => ({
|
||||
conferenceID: c.conferenceID,
|
||||
name: c.name,
|
||||
status: c.status,
|
||||
price: c.price || 0,
|
||||
repoCount: ((c.repositories as unknown[]) || []).length,
|
||||
startDate: c.startDate ? new Date(c.startDate as Date).toISOString() : "",
|
||||
endDate: c.endDate ? new Date(c.endDate as Date).toISOString() : "",
|
||||
}));
|
||||
return sendCsv(
|
||||
res,
|
||||
`conferences-${new Date().toISOString().slice(0, 10)}.csv`,
|
||||
["conferenceID", "name", "status", "price", "repoCount", "startDate", "endDate"],
|
||||
rows
|
||||
);
|
||||
}
|
||||
|
||||
const [total, results, statusCounts] = await Promise.all([
|
||||
ConferenceModel.find(filter).countDocuments(),
|
||||
ConferenceModel.find(filter).sort(sort).limit(limit).skip(skipIndex),
|
||||
ConferenceModel.aggregate([
|
||||
{ $match: filter },
|
||||
{ $group: { _id: "$status", count: { $sum: 1 } } },
|
||||
]),
|
||||
]);
|
||||
res.json({ query: filter, page, total, sort, results, statusCounts });
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -175,7 +175,7 @@ router.delete(
|
||||
const user = await getUser(req);
|
||||
isOwnerOrAdmin([repo.owner.id], user);
|
||||
await repo.updateStatus(RepositoryStatus.REMOVING);
|
||||
await removeQueue.add(repo.repoId, repo, { jobId: repo.repoId });
|
||||
await removeQueue.add(repo.repoId, { repoId: repo.repoId }, { jobId: repo.repoId });
|
||||
return res.json({ status: repo.status });
|
||||
} catch (error) {
|
||||
handleError(error, res, req);
|
||||
@@ -470,7 +470,7 @@ router.post(
|
||||
repo.model.conference = repoUpdate.conference;
|
||||
await repo.updateStatus(RepositoryStatus.PREPARING);
|
||||
res.json({ status: repo.status });
|
||||
await downloadQueue.add(repo.repoId, repo, { jobId: repo.repoId });
|
||||
await downloadQueue.add(repo.repoId, { repoId: repo.repoId }, { jobId: repo.repoId });
|
||||
} catch (error) {
|
||||
return handleError(error, res, req);
|
||||
}
|
||||
@@ -559,7 +559,7 @@ router.post("/", async (req: express.Request, res: express.Response) => {
|
||||
}
|
||||
|
||||
res.send({ status: repo.status });
|
||||
downloadQueue.add(repo.repoId, new Repository(repo), {
|
||||
downloadQueue.add(repo.repoId, { repoId: repo.repoId }, {
|
||||
jobId: repo.repoId,
|
||||
attempts: 3,
|
||||
});
|
||||
|
||||
@@ -178,7 +178,7 @@ router.get(
|
||||
// && repo.status != "preparing"
|
||||
) {
|
||||
await repo.updateStatus(RepositoryStatus.PREPARING);
|
||||
await downloadQueue.add(repo.repoId, repo, {
|
||||
await downloadQueue.add(repo.repoId, { repoId: repo.repoId }, {
|
||||
jobId: repo.repoId,
|
||||
attempts: 3,
|
||||
});
|
||||
|
||||
@@ -61,6 +61,7 @@ router.get("/quota", async (req: express.Request, res: express.Response) => {
|
||||
}
|
||||
|
||||
if (uncachedIds.length) {
|
||||
const uncachedSet = new Set(uncachedIds);
|
||||
const agg = await FileModel.aggregate([
|
||||
{ $match: { repoId: { $in: uncachedIds } } },
|
||||
{
|
||||
@@ -76,7 +77,7 @@ router.get("/quota", async (req: express.Request, res: express.Response) => {
|
||||
byId.set(row._id, { storage: row.storage || 0, file: row.file || 0 });
|
||||
}
|
||||
for (const r of ready) {
|
||||
if (!uncachedIds.includes(r.repoId)) continue;
|
||||
if (!uncachedSet.has(r.repoId)) continue;
|
||||
const size = byId.get(r.repoId) || { storage: 0, file: 0 };
|
||||
totalStorage += size.storage;
|
||||
totalFiles += size.file;
|
||||
@@ -85,7 +86,7 @@ router.get("/quota", async (req: express.Request, res: express.Response) => {
|
||||
if (isConnected) {
|
||||
await Promise.all(
|
||||
ready
|
||||
.filter((r) => uncachedIds.includes(r.repoId))
|
||||
.filter((r) => uncachedSet.has(r.repoId))
|
||||
.map((r) => r.model.save())
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user