diff --git a/public/partials/anonymize.htm b/public/partials/anonymize.htm
index 57660cf..8497b69 100644
--- a/public/partials/anonymize.htm
+++ b/public/partials/anonymize.htm
@@ -148,7 +148,7 @@
diff --git a/src/core/anonymize-utils.ts b/src/core/anonymize-utils.ts
index 155f501..7e7ddb5 100644
--- a/src/core/anonymize-utils.ts
+++ b/src/core/anonymize-utils.ts
@@ -4,7 +4,11 @@ import { StringDecoder } from "string_decoder";
import { isText } from "istextorbinary";
import config from "../config";
-import { termVariants, withWordBoundaries } from "./term-matching";
+import {
+ parseTermSpec,
+ termVariants,
+ withWordBoundaries,
+} from "./term-matching";
const urlRegex =
/\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
@@ -189,11 +193,19 @@ export class ContentAnonimizer {
private replaceTerms(content: string): string {
const terms = this.opt.terms || [];
for (let i = 0; i < terms.length; i++) {
- let term = terms[i];
- if (term.trim() == "") {
+ const spec = terms[i];
+ if (spec.trim() == "") {
continue;
}
- const mask = config.ANONYMIZATION_MASK + "-" + (i + 1);
+ // #285 — entries of the form "term=>replacement" override the default
+ // XXXX-N mask so users can scrub with their preferred token (e.g.
+ // "ABC", "XYZ"), keeping anonymized identifiers valid in source code.
+ const parsed = parseTermSpec(spec);
+ let term = parsed.term;
+ const mask =
+ parsed.replacement !== null
+ ? parsed.replacement
+ : config.ANONYMIZATION_MASK + "-" + (i + 1);
try {
new RegExp(term, "gi");
} catch {
@@ -239,20 +251,23 @@ export class ContentAnonimizer {
export function anonymizePath(path: string, terms: string[]) {
for (let i = 0; i < terms.length; i++) {
- let term = terms[i];
- if (term.trim() == "") {
+ const spec = terms[i];
+ if (spec.trim() == "") {
continue;
}
+ const parsed = parseTermSpec(spec);
+ let term = parsed.term;
+ const mask =
+ parsed.replacement !== null
+ ? parsed.replacement
+ : config.ANONYMIZATION_MASK + "-" + (i + 1);
try {
new RegExp(term, "gi");
} catch {
// escape regex characters
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
}
- path = path.replace(
- new RegExp(term, "gi"),
- config.ANONYMIZATION_MASK + "-" + (i + 1)
- );
+ path = path.replace(new RegExp(term, "gi"), mask);
}
return path;
}
diff --git a/src/core/term-matching.ts b/src/core/term-matching.ts
index ec6565d..07572eb 100644
--- a/src/core/term-matching.ts
+++ b/src/core/term-matching.ts
@@ -102,3 +102,27 @@ export function termVariants(escapedTerm: string): {
{ pattern: diacriticInsensitive(stripped), sniff: stripped, unicode: true },
];
}
+
+// A term can override the default `XXXX-N` mask via the syntax
+// =>
+// e.g. "Anonymous=>ABC" replaces "Anonymous" with "ABC". Whitespace around
+// `=>` is allowed. The replacement is inserted verbatim, so users can pick
+// strings without the hyphen that breaks identifiers.
+//
+// If the entry is just `=>` with no LHS, or has no separator, the original
+// term is returned and the caller falls back to the default mask.
+export function parseTermSpec(spec: string): {
+ term: string;
+ replacement: string | null;
+} {
+ const idx = spec.indexOf("=>");
+ if (idx < 0) {
+ return { term: spec, replacement: null };
+ }
+ const term = spec.slice(0, idx).replace(/\s+$/, "");
+ const replacement = spec.slice(idx + 2).replace(/^\s+/, "");
+ if (!term) {
+ return { term: spec, replacement: null };
+ }
+ return { term, replacement };
+}
diff --git a/test/anonymize-utils.test.js b/test/anonymize-utils.test.js
index 5ab5fed..550e4e1 100644
--- a/test/anonymize-utils.test.js
+++ b/test/anonymize-utils.test.js
@@ -5,6 +5,7 @@ require("ts-node/register/transpile-only");
const {
withWordBoundaries,
termVariants,
+ parseTermSpec,
} = require("../src/core/term-matching");
/**
@@ -97,11 +98,16 @@ class ContentAnonimizer {
replaceTerms(content) {
const terms = this.opt.terms || [];
for (let i = 0; i < terms.length; i++) {
- let term = terms[i];
- if (term.trim() == "") {
+ const spec = terms[i];
+ if (spec.trim() == "") {
continue;
}
- const mask = ANONYMIZATION_MASK + "-" + (i + 1);
+ const parsed = parseTermSpec(spec);
+ let term = parsed.term;
+ const mask =
+ parsed.replacement !== null
+ ? parsed.replacement
+ : ANONYMIZATION_MASK + "-" + (i + 1);
try {
new RegExp(term, "gi");
} catch {
@@ -140,19 +146,22 @@ class ContentAnonimizer {
function anonymizePath(path, terms) {
for (let i = 0; i < terms.length; i++) {
- let term = terms[i];
- if (term.trim() == "") {
+ const spec = terms[i];
+ if (spec.trim() == "") {
continue;
}
+ const parsed = parseTermSpec(spec);
+ let term = parsed.term;
+ const mask =
+ parsed.replacement !== null
+ ? parsed.replacement
+ : ANONYMIZATION_MASK + "-" + (i + 1);
try {
new RegExp(term, "gi");
} catch {
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
}
- path = path.replace(
- new RegExp(term, "gi"),
- ANONYMIZATION_MASK + "-" + (i + 1)
- );
+ path = path.replace(new RegExp(term, "gi"), mask);
}
return path;
}
@@ -236,6 +245,30 @@ describe("ContentAnonimizer", function () {
expect(result).to.include("XXXX-1");
});
+ // #285 — `term=>replacement` uses the user-supplied replacement
+ // instead of XXXX-N, so anonymized identifiers can stay valid in code.
+ it("uses a custom replacement when the term is 'term=>replacement'", function () {
+ const a = new ContentAnonimizer({ terms: ["Anonymous=>ABC"] });
+ const result = a.anonymize("class Anonymous extends Base {}");
+ expect(result).to.equal("class ABC extends Base {}");
+ });
+
+ it("supports custom and default-mask terms together with stable indices", function () {
+ const a = new ContentAnonimizer({
+ terms: ["Alpha=>AAA", "Beta"],
+ });
+ const result = a.anonymize("Alpha and Beta");
+ // Beta uses XXXX-2 (its 1-based index in the list), even though
+ // Alpha had a custom replacement.
+ expect(result).to.equal("AAA and XXXX-2");
+ });
+
+ it("falls back to the default mask when the entry has no replacement", function () {
+ const a = new ContentAnonimizer({ terms: ["Foo=>"] });
+ const result = a.anonymize("Foo bar");
+ expect(result).to.equal(" bar");
+ });
+
// #280 — accented terms should match both the accented and unaccented
// variants so "Davó" scrubs "Davo" (and vice versa).
it("matches accented and unaccented variants of the same term", function () {