Revise name/branch splitting code

This now breaks the name into fragments and reruns the fragments against the NSi matcher rather than using flaky regular expressions. Has a few advantages: (re: https://github.com/osmlab/name-suggestion-index/issues/4543#issuecomment-797577999)
2026-02-13 01:02:58 +00:00 · 2021-03-12 15:03:50 -05:00
parent 3cf5f69082
commit f5b6024988
1 changed files with 20 additions and 16 deletions
--- a/modules/services/nsi.js
+++ b/modules/services/nsi.js
@@ -458,6 +458,10 @@ function _upgradeTags(tags, loc) {

    // At this point we have matched a canonical item and can suggest tag upgrades..
    const tkv = item.tkv;
+    const parts = tkv.split('/', 3);     // tkv = "tree/key/value"
+    const t = parts[0];
+    const k = parts[1];
+    const v = parts[2];
    const category = _nsi.data[tkv];
    const properties = category.properties || {};

@@ -488,31 +492,31 @@ function _upgradeTags(tags, loc) {
    // Do the tag upgrade
    Object.assign(newTags, item.tags, keepTags);

-    // Special `branch` splitting rule - IF..
-    // - we are suggesting to replace `name`, AND
+    // Special `branch` splitting rules - IF..
+    // - NSI is suggesting to replace `name`, AND
    // - `branch` doesn't already contain something, AND
    // - original name has not moved to an alternate name (e.g. "Dunkin' Donuts" -> "Dunkin'"), AND
-    // - original name is just "some name" + "some stuff", THEN
+    // - original name is "some name" + "some stuff", THEN
    // consider splitting `name` into `name`/`branch`..
    const origName = tags.name;
    const newName = newTags.name;
    if (newName && origName && newName !== origName && !newTags.branch) {
      const newNames = gatherNames(newTags);
      const newSet = new Set([...newNames.primary, ...newNames.alternate]);
-      const isMoved = newSet.has(origName);
+      const isMoved = newSet.has(origName);   // another tag holds the original name now
+
      if (!isMoved) {
-        // Try the new names, longest to shortest, to match them into a "Name Branch" pattern.
-        const candidates = Array.from(newSet).sort((a, b) => b.length - a.length);
-        for (let j = 0; j < candidates.length; j++) {
-          const n = escapeRegex(candidates[j]);
-          const re = new RegExp(`^${n}\\s(.+)$`, 'i');  // e.g. "Tesco Canary Wharf"
-          const captured = origName.match(re);
-          if (captured) {
-            const branch = captured[1].trim();
-            if (branch) {
-              newTags.branch = captured[1];
-              break;
-            }
+        // Test name fragments, longest to shortest, to match them into a "Name Branch" pattern.
+        // e.g. "TUI ReiseCenter - Neuss Innenstadt" -> ["TUI", "ReiseCenter", "Neuss", "Innenstadt"]
+        const nameParts = origName.split(/[\s\-,.]/);
+        for (let split = nameParts.length - 1; split > 0; split--) {
+          const name = nameParts.slice(0, split).join(' ');  // e.g. "TUI ReiseCenter"
+          const branch = nameParts.slice(split).join(' ');   // e.g. "Neuss Innenstadt"
+          const hits = _nsi.matcher.match(k, v, name, loc);
+          if (!hits || !hits.length) continue;             // no match, try next name fragment
+          if (hits.some(hit => hit.itemID === itemID)) {   // matched the same item as above to a name fragment
+            newTags.branch = branch;
+            break;
          }
        }
      }