diff --git a/src/core/anonymize-utils.ts b/src/core/anonymize-utils.ts index 7a17e01..efc3065 100644 --- a/src/core/anonymize-utils.ts +++ b/src/core/anonymize-utils.ts @@ -292,11 +292,17 @@ function compileTerms(terms: string[] | undefined): CompiledTermVariant[] { unicode: variant.unicode, }); const baseFlags = variant.unicode ? "iu" : "i"; - compiled.push({ - replaceRegex: new RegExp(bounded, "g" + baseFlags), - testRegex: new RegExp(bounded, baseFlags), - mask, - }); + // A user-supplied regex can be valid without `u` but illegal with it + // (e.g. `[\w-\.]` — a range between class shorthands is rejected only + // in unicode mode). Skip variants that fail to compile so the other + // variant still anonymizes. + try { + const replaceRegex = new RegExp(bounded, "g" + baseFlags); + const testRegex = new RegExp(bounded, baseFlags); + compiled.push({ replaceRegex, testRegex, mask }); + } catch { + continue; + } } } return compiled; diff --git a/src/server/routes/route-utils.ts b/src/server/routes/route-utils.ts index fa4a54c..06e3a79 100644 --- a/src/server/routes/route-utils.ts +++ b/src/server/routes/route-utils.ts @@ -114,12 +114,42 @@ export function isOwnerCoauthorOrAdmin(repo: Repository, user: User) { }); } +// Pull the first project-relevant frame ("file:line:col") out of a stack so +// background-job errors (no req.originalUrl) still get a debug pointer in the +// `url` slot. Skips node internals and node_modules. +function originFromStack(stack: unknown): string | undefined { + if (typeof stack !== "string") return undefined; + const lines = stack.split("\n"); + for (const line of lines) { + const m = line.match(/\(([^()\s]+:\d+:\d+)\)\s*$/) || + line.match(/at\s+([^()\s]+:\d+:\d+)\s*$/); + if (!m) continue; + const loc = m[1]; + if (loc.startsWith("node:") || loc.includes("node_modules")) continue; + return loc; + } + return undefined; +} + +function ensureUrl( + payload: Record, + req?: express.Request +) { + if (req?.originalUrl) { + payload.url = req.originalUrl; + return; + } + if (typeof payload.url === "string" && payload.url) return; + const origin = originFromStack(payload.stack); + if (origin) payload.url = origin; +} + // eslint-disable-next-line @typescript-eslint/no-explicit-any function printError(error: any, req?: express.Request) { if (error instanceof AnonymousError) { if (req?.originalUrl === "/api/repo/undefined/options") return; const payload: Record = serializeError(error); - if (req?.originalUrl) payload.url = req.originalUrl; + ensureUrl(payload, req); // Use the error's snake_case message as the logger summary so the admin // Errors page surfaces something meaningful (e.g. "repoId_already_used") // instead of a generic "anonymous error" wrapper. @@ -135,7 +165,7 @@ function printError(error: any, req?: express.Request) { } } else if (error instanceof HTTPError) { const payload: Record = serializeError(error); - if (req?.originalUrl) payload.url = req.originalUrl; + ensureUrl(payload, req); logger.error(error.code || error.name || "HTTPError", payload); } else { // Unhandled errors: use the error class name (SyntaxError, TypeError, @@ -148,7 +178,7 @@ function printError(error: any, req?: express.Request) { ) { serialized.httpStatus = 500; } - if (req?.originalUrl) serialized.url = req.originalUrl; + ensureUrl(serialized, req); const summary = (error && typeof error === "object" && ((error as { name?: string }).name || diff --git a/test/anonymize-utils.test.js b/test/anonymize-utils.test.js index 469cd69..e195382 100644 --- a/test/anonymize-utils.test.js +++ b/test/anonymize-utils.test.js @@ -119,6 +119,12 @@ class ContentAnonimizer { unicode: variant.unicode, }); const flags = variant.unicode ? "giu" : "gi"; + let regex; + try { + regex = new RegExp(bounded, flags); + } catch { + continue; + } content = content.replace(urlRegex, (match) => { if (new RegExp(bounded, flags).test(match)) { this.wasAnonymized = true; @@ -126,7 +132,7 @@ class ContentAnonimizer { } return match; }); - content = content.replace(new RegExp(bounded, flags), () => { + content = content.replace(regex, () => { this.wasAnonymized = true; return mask; }); @@ -217,6 +223,21 @@ describe("ContentAnonimizer", function () { expect(() => anon.anonymize("some foo(bar here")).to.not.throw(); }); + // A user regex valid without `u` but illegal with it (range between + // class shorthands like `[\w-\.]`) must not crash compilation; the + // non-unicode variant should still anonymize matches. + it("accepts a regex that only compiles without the unicode flag", function () { + const anon = new ContentAnonimizer({ + terms: ["[\\w-\\.]+@([\\w-]+\\.)+[\\w-]{2,4}"], + }); + let result; + expect(() => { + result = anon.anonymize("contact me at alice@example.com please"); + }).to.not.throw(); + expect(result).to.not.include("alice@example.com"); + expect(result).to.include("XXXX-1"); + }); + // #175 — terms starting with a non-word char (e.g. "@username") were // silently skipped because \b can't match between two non-word chars. it("replaces terms starting with a non-word character (e.g. @user)", function () {