From 49b124e188913679dfcc293236afd851cc96e7f0 Mon Sep 17 00:00:00 2001 From: tdurieux Date: Tue, 5 May 2026 00:07:49 +0300 Subject: [PATCH] fix regex --- src/core/term-matching.ts | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/core/term-matching.ts b/src/core/term-matching.ts index 07572eb..fafe7d3 100644 --- a/src/core/term-matching.ts +++ b/src/core/term-matching.ts @@ -71,14 +71,28 @@ export const DIACRITIC_CLASSES: Record = { // diacritic-insensitive way. ASCII letters are replaced with a character // class that includes their accented siblings; other chars are left alone so // regex metacharacters and escape sequences keep working. +// +// Output is consumed with the `u` flag, where identity escapes of arbitrary +// characters (e.g. `\-`) are syntax errors. Strip the backslash from any +// escape that isn't valid under `u` so user input like "170cm\-56kg" still +// compiles. +const UNICODE_ESCAPE_CHARS = new Set( + "^$\\.*+?()[]{}|/dDsSwWbBnrtvf0cxupP".split("") +); export function diacriticInsensitive(escapedTerm: string): string { let out = ""; let i = 0; while (i < escapedTerm.length) { const c = escapedTerm[i]; - // Pass through backslash escapes verbatim (e.g. "\." or "\d"). if (c === "\\" && i + 1 < escapedTerm.length) { - out += c + escapedTerm[i + 1]; + const next = escapedTerm[i + 1]; + // Drop the backslash for invalid unicode-mode identity escapes; the + // character itself is preserved literally. + if (UNICODE_ESCAPE_CHARS.has(next)) { + out += c + next; + } else { + out += next; + } i += 2; continue; }