mirror of
https://github.com/tdurieux/anonymous_github.git
synced 2026-05-16 06:49:09 +02:00
feat: per-term anonymization output via term=>replacement
A term entered as "Anonymous=>ABC" now scrubs "Anonymous" to "ABC" instead of "XXXX-N". Lets users keep anonymized identifiers valid in source code (no hyphen) and align tokens between paper text and repo. Indexing for default-mask terms is unchanged: a list of "Alpha=>AAA", "Beta" still produces XXXX-2 for Beta. Fixes #285.
This commit is contained in:
+25
-10
@@ -4,7 +4,11 @@ import { StringDecoder } from "string_decoder";
|
||||
import { isText } from "istextorbinary";
|
||||
|
||||
import config from "../config";
|
||||
import { termVariants, withWordBoundaries } from "./term-matching";
|
||||
import {
|
||||
parseTermSpec,
|
||||
termVariants,
|
||||
withWordBoundaries,
|
||||
} from "./term-matching";
|
||||
|
||||
const urlRegex =
|
||||
/<?\b((https?|ftp|file):\/\/)[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]\b\/?>?/g;
|
||||
@@ -189,11 +193,19 @@ export class ContentAnonimizer {
|
||||
private replaceTerms(content: string): string {
|
||||
const terms = this.opt.terms || [];
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
let term = terms[i];
|
||||
if (term.trim() == "") {
|
||||
const spec = terms[i];
|
||||
if (spec.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
const mask = config.ANONYMIZATION_MASK + "-" + (i + 1);
|
||||
// #285 — entries of the form "term=>replacement" override the default
|
||||
// XXXX-N mask so users can scrub with their preferred token (e.g.
|
||||
// "ABC", "XYZ"), keeping anonymized identifiers valid in source code.
|
||||
const parsed = parseTermSpec(spec);
|
||||
let term = parsed.term;
|
||||
const mask =
|
||||
parsed.replacement !== null
|
||||
? parsed.replacement
|
||||
: config.ANONYMIZATION_MASK + "-" + (i + 1);
|
||||
try {
|
||||
new RegExp(term, "gi");
|
||||
} catch {
|
||||
@@ -239,20 +251,23 @@ export class ContentAnonimizer {
|
||||
|
||||
export function anonymizePath(path: string, terms: string[]) {
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
let term = terms[i];
|
||||
if (term.trim() == "") {
|
||||
const spec = terms[i];
|
||||
if (spec.trim() == "") {
|
||||
continue;
|
||||
}
|
||||
const parsed = parseTermSpec(spec);
|
||||
let term = parsed.term;
|
||||
const mask =
|
||||
parsed.replacement !== null
|
||||
? parsed.replacement
|
||||
: config.ANONYMIZATION_MASK + "-" + (i + 1);
|
||||
try {
|
||||
new RegExp(term, "gi");
|
||||
} catch {
|
||||
// escape regex characters
|
||||
term = term.replace(/[-[\]{}()*+?.,\\^$|#]/g, "\\$&");
|
||||
}
|
||||
path = path.replace(
|
||||
new RegExp(term, "gi"),
|
||||
config.ANONYMIZATION_MASK + "-" + (i + 1)
|
||||
);
|
||||
path = path.replace(new RegExp(term, "gi"), mask);
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
@@ -102,3 +102,27 @@ export function termVariants(escapedTerm: string): {
|
||||
{ pattern: diacriticInsensitive(stripped), sniff: stripped, unicode: true },
|
||||
];
|
||||
}
|
||||
|
||||
// A term can override the default `XXXX-N` mask via the syntax
|
||||
// <term>=><replacement>
|
||||
// e.g. "Anonymous=>ABC" replaces "Anonymous" with "ABC". Whitespace around
|
||||
// `=>` is allowed. The replacement is inserted verbatim, so users can pick
|
||||
// strings without the hyphen that breaks identifiers.
|
||||
//
|
||||
// If the entry is just `=>` with no LHS, or has no separator, the original
|
||||
// term is returned and the caller falls back to the default mask.
|
||||
export function parseTermSpec(spec: string): {
|
||||
term: string;
|
||||
replacement: string | null;
|
||||
} {
|
||||
const idx = spec.indexOf("=>");
|
||||
if (idx < 0) {
|
||||
return { term: spec, replacement: null };
|
||||
}
|
||||
const term = spec.slice(0, idx).replace(/\s+$/, "");
|
||||
const replacement = spec.slice(idx + 2).replace(/^\s+/, "");
|
||||
if (!term) {
|
||||
return { term: spec, replacement: null };
|
||||
}
|
||||
return { term, replacement };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user