diff --git a/public/partials/anonymize.htm b/public/partials/anonymize.htm index 57660cf..8497b69 100644 --- a/public/partials/anonymize.htm +++ b/public/partials/anonymize.htm @@ -148,7 +148,7 @@
- One term per line (regex allowed). Replaced by {{site_options.ANONYMIZATION_MASK}}-[N]. + One term per line (regex allowed). Replaced by {{site_options.ANONYMIZATION_MASK}}-[N], or use term=>replacement to pick your own (e.g. Anonymous=>ABC).
Regex characters detected. Escape them if unintentional.
Terms are in an invalid format.
diff --git a/src/core/anonymize-utils.ts b/src/core/anonymize-utils.ts index 155f501..7e7ddb5 100644 --- a/src/core/anonymize-utils.ts +++ b/src/core/anonymize-utils.ts @@ -4,7 +4,11 @@ import { StringDecoder } from "string_decoder"; import { isText } from "istextorbinary"; import config from "../config"; -import { termVariants, withWordBoundaries } from "./term-matching"; +import { + parseTermSpec, + termVariants, + withWordBoundaries, +} from "./term-matching"; const urlRegex = /?/g; @@ -189,11 +193,19 @@ export class ContentAnonimizer { private replaceTerms(content: string): string { const terms = this.opt.terms || []; for (let i = 0; i < terms.length; i++) { - let term = terms[i]; - if (term.trim() == "") { + const spec = terms[i]; + if (spec.trim() == "") { continue; } - const mask = config.ANONYMIZATION_MASK + "-" + (i + 1); + // #285 — entries of the form "term=>replacement" override the default + // XXXX-N mask so users can scrub with their preferred token (e.g. + // "ABC", "XYZ"), keeping anonymized identifiers valid in source code. + const parsed = parseTermSpec(spec); + let term = parsed.term; + const mask = + parsed.replacement !== null + ? parsed.replacement + : config.ANONYMIZATION_MASK + "-" + (i + 1); try { new RegExp(term, "gi"); } catch { @@ -239,20 +251,23 @@ export class ContentAnonimizer { export function anonymizePath(path: string, terms: string[]) { for (let i = 0; i < terms.length; i++) { - let term = terms[i]; - if (term.trim() == "") { + const spec = terms[i]; + if (spec.trim() == "") { continue; } + const parsed = parseTermSpec(spec); + let term = parsed.term; + const mask = + parsed.replacement !== null + ? parsed.replacement + : config.ANONYMIZATION_MASK + "-" + (i + 1); try { new RegExp(term, "gi"); } catch { // escape regex characters term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&"); } - path = path.replace( - new RegExp(term, "gi"), - config.ANONYMIZATION_MASK + "-" + (i + 1) - ); + path = path.replace(new RegExp(term, "gi"), mask); } return path; } diff --git a/src/core/term-matching.ts b/src/core/term-matching.ts index ec6565d..07572eb 100644 --- a/src/core/term-matching.ts +++ b/src/core/term-matching.ts @@ -102,3 +102,27 @@ export function termVariants(escapedTerm: string): { { pattern: diacriticInsensitive(stripped), sniff: stripped, unicode: true }, ]; } + +// A term can override the default `XXXX-N` mask via the syntax +// => +// e.g. "Anonymous=>ABC" replaces "Anonymous" with "ABC". Whitespace around +// `=>` is allowed. The replacement is inserted verbatim, so users can pick +// strings without the hyphen that breaks identifiers. +// +// If the entry is just `=>` with no LHS, or has no separator, the original +// term is returned and the caller falls back to the default mask. +export function parseTermSpec(spec: string): { + term: string; + replacement: string | null; +} { + const idx = spec.indexOf("=>"); + if (idx < 0) { + return { term: spec, replacement: null }; + } + const term = spec.slice(0, idx).replace(/\s+$/, ""); + const replacement = spec.slice(idx + 2).replace(/^\s+/, ""); + if (!term) { + return { term: spec, replacement: null }; + } + return { term, replacement }; +} diff --git a/test/anonymize-utils.test.js b/test/anonymize-utils.test.js index 5ab5fed..550e4e1 100644 --- a/test/anonymize-utils.test.js +++ b/test/anonymize-utils.test.js @@ -5,6 +5,7 @@ require("ts-node/register/transpile-only"); const { withWordBoundaries, termVariants, + parseTermSpec, } = require("../src/core/term-matching"); /** @@ -97,11 +98,16 @@ class ContentAnonimizer { replaceTerms(content) { const terms = this.opt.terms || []; for (let i = 0; i < terms.length; i++) { - let term = terms[i]; - if (term.trim() == "") { + const spec = terms[i]; + if (spec.trim() == "") { continue; } - const mask = ANONYMIZATION_MASK + "-" + (i + 1); + const parsed = parseTermSpec(spec); + let term = parsed.term; + const mask = + parsed.replacement !== null + ? parsed.replacement + : ANONYMIZATION_MASK + "-" + (i + 1); try { new RegExp(term, "gi"); } catch { @@ -140,19 +146,22 @@ class ContentAnonimizer { function anonymizePath(path, terms) { for (let i = 0; i < terms.length; i++) { - let term = terms[i]; - if (term.trim() == "") { + const spec = terms[i]; + if (spec.trim() == "") { continue; } + const parsed = parseTermSpec(spec); + let term = parsed.term; + const mask = + parsed.replacement !== null + ? parsed.replacement + : ANONYMIZATION_MASK + "-" + (i + 1); try { new RegExp(term, "gi"); } catch { term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&"); } - path = path.replace( - new RegExp(term, "gi"), - ANONYMIZATION_MASK + "-" + (i + 1) - ); + path = path.replace(new RegExp(term, "gi"), mask); } return path; } @@ -236,6 +245,30 @@ describe("ContentAnonimizer", function () { expect(result).to.include("XXXX-1"); }); + // #285 — `term=>replacement` uses the user-supplied replacement + // instead of XXXX-N, so anonymized identifiers can stay valid in code. + it("uses a custom replacement when the term is 'term=>replacement'", function () { + const a = new ContentAnonimizer({ terms: ["Anonymous=>ABC"] }); + const result = a.anonymize("class Anonymous extends Base {}"); + expect(result).to.equal("class ABC extends Base {}"); + }); + + it("supports custom and default-mask terms together with stable indices", function () { + const a = new ContentAnonimizer({ + terms: ["Alpha=>AAA", "Beta"], + }); + const result = a.anonymize("Alpha and Beta"); + // Beta uses XXXX-2 (its 1-based index in the list), even though + // Alpha had a custom replacement. + expect(result).to.equal("AAA and XXXX-2"); + }); + + it("falls back to the default mask when the entry has no replacement", function () { + const a = new ContentAnonimizer({ terms: ["Foo=>"] }); + const result = a.anonymize("Foo bar"); + expect(result).to.equal(" bar"); + }); + // #280 — accented terms should match both the accented and unaccented // variants so "Davó" scrubs "Davo" (and vice versa). it("matches accented and unaccented variants of the same term", function () {