diff --git a/modules/core/context.js b/modules/core/context.js index b9bfe958c..27d074be3 100644 --- a/modules/core/context.js +++ b/modules/core/context.js @@ -557,7 +557,7 @@ export function coreContext() { Object.values(services).forEach(service => { if (service && typeof service.init === 'function') { - service.init(); + service.init(context); } }); @@ -579,30 +579,9 @@ export function coreContext() { _ui.ensureLoaded() .then(() => { _photos.init(); - loadNSIPresets(); }); } } - - - function loadNSIPresets() { - return Promise.all([ - fileFetcher.get('nsi_presets'), - fileFetcher.get('nsi_features') - ]) - .then(vals => { - // Add `suggestion=true` to all the nsi presets - // The preset json schema doesn't include it, but the iD code still uses it - Object.values(vals[0].presets).forEach(preset => preset.suggestion = true); - - presetManager.merge({ - presets: vals[0].presets, - featureCollection: vals[1] - }); - }) - .catch(() => { /* ignore */ }); - } - }; return context; diff --git a/modules/core/file_fetcher.js b/modules/core/file_fetcher.js index 07a88e484..8ab057d3e 100644 --- a/modules/core/file_fetcher.js +++ b/modules/core/file_fetcher.js @@ -19,14 +19,6 @@ export function coreFileFetcher() { 'keepRight': 'data/keepRight.min.json', 'languages': 'data/languages.min.json', 'locales': 'locales/index.min.json', - - 'nsi_presets': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/presets/nsi-id-presets.min.json', - 'nsi_data': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/nsi.min.json', - 'nsi_features': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/featureCollection.min.json', - 'nsi_generics': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/genericWords.min.json', - 'nsi_replacements': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/replacements.min.json', - 'nsi_trees': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/trees.min.json', - 'oci_features': 'https://cdn.jsdelivr.net/npm/osm-community-index@3/dist/featureCollection.min.json', 'oci_resources': 'https://cdn.jsdelivr.net/npm/osm-community-index@3/dist/resources.min.json', 'preset_categories': 'https://cdn.jsdelivr.net/npm/@openstreetmap/id-tagging-schema@3/dist/preset_categories.min.json', diff --git a/modules/services/index.js b/modules/services/index.js index ab9aa5503..37bb4075e 100644 --- a/modules/services/index.js +++ b/modules/services/index.js @@ -4,6 +4,7 @@ import serviceOsmose from './osmose'; import serviceMapillary from './mapillary'; import serviceMapRules from './maprules'; import serviceNominatim from './nominatim'; +import serviceNsi from './nsi'; import serviceOpenstreetcam from './openstreetcam'; import serviceOsm from './osm'; import serviceOsmWikibase from './osm_wikibase'; @@ -14,36 +15,38 @@ import serviceWikidata from './wikidata'; import serviceWikipedia from './wikipedia'; -export var services = { - geocoder: serviceNominatim, - keepRight: serviceKeepRight, - improveOSM: serviceImproveOSM, - osmose: serviceOsmose, - mapillary: serviceMapillary, - openstreetcam: serviceOpenstreetcam, - osm: serviceOsm, - osmWikibase: serviceOsmWikibase, - maprules: serviceMapRules, - streetside: serviceStreetside, - taginfo: serviceTaginfo, - vectorTile: serviceVectorTile, - wikidata: serviceWikidata, - wikipedia: serviceWikipedia +export let services = { + geocoder: serviceNominatim, + keepRight: serviceKeepRight, + improveOSM: serviceImproveOSM, + osmose: serviceOsmose, + mapillary: serviceMapillary, + nsi: serviceNsi, + openstreetcam: serviceOpenstreetcam, + osm: serviceOsm, + osmWikibase: serviceOsmWikibase, + maprules: serviceMapRules, + streetside: serviceStreetside, + taginfo: serviceTaginfo, + vectorTile: serviceVectorTile, + wikidata: serviceWikidata, + wikipedia: serviceWikipedia }; export { - serviceKeepRight, - serviceImproveOSM, - serviceOsmose, - serviceMapillary, - serviceMapRules, - serviceNominatim, - serviceOpenstreetcam, - serviceOsm, - serviceOsmWikibase, - serviceStreetside, - serviceTaginfo, - serviceVectorTile, - serviceWikidata, - serviceWikipedia -}; \ No newline at end of file + serviceKeepRight, + serviceImproveOSM, + serviceOsmose, + serviceMapillary, + serviceMapRules, + serviceNominatim, + serviceNsi, + serviceOpenstreetcam, + serviceOsm, + serviceOsmWikibase, + serviceStreetside, + serviceTaginfo, + serviceVectorTile, + serviceWikidata, + serviceWikipedia +}; diff --git a/modules/services/nsi.js b/modules/services/nsi.js new file mode 100644 index 000000000..7ae599110 --- /dev/null +++ b/modules/services/nsi.js @@ -0,0 +1,528 @@ +import { matcher as Matcher } from 'name-suggestion-index'; + +import { fileFetcher, locationManager } from '../core'; +import { presetManager } from '../presets'; + +// This service contains all the code related to the **name-suggestion-index** (aka NSI) +// NSI contains the most correct tagging for many commonly mapped features. +// See https://github.com/osmlab/name-suggestion-index and https://nsi.guide + + +// DATA + +let _nsiStatus = 'loading'; // 'loading', 'ok', 'failed' +let _nsi = {}; + +// Sometimes we can upgrade a feature tagged like `building=yes` to a better tag. +const buildingPreset = { + 'building/commercial': true, + 'building/government': true, + 'building/hotel': true, + 'building/retail': true, + 'building/office': true, + 'building/supermarket': true, + 'building/yes': true +}; + +// There are a few exceptions to the namelike regexes. +// Usually a tag suffix contains a language code like `name:en`, `name:ru` +// but we want to exclude things like `operator:type`, `name:etymology`, etc.. +const notNames = /:(colou?r|type|forward|backward|left|right|etymology|pronunciation|wikipedia)$/i; + + +// PRIVATE FUNCTIONS + +// `setNsiSources()` +// Adds the sources to iD's filemap so we can start downloading data. +// +function setNsiSources() { + const sources = { + 'nsi_presets': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/presets/nsi-id-presets.min.json', + 'nsi_data': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/nsi.min.json', + 'nsi_features': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/featureCollection.min.json', + 'nsi_generics': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/genericWords.min.json', + 'nsi_replacements': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/replacements.min.json', + 'nsi_trees': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/trees.min.json' + }; + + let fileMap = fileFetcher.fileMap(); + for (const k in sources) { + fileMap[k] = sources[k]; + } +} + + +// `loadNsiPresets()` +// Returns a Promise fulfilled when the presets have been downloaded and merged into iD. +// +function loadNsiPresets() { + return ( + Promise.all([ + fileFetcher.get('nsi_presets'), + fileFetcher.get('nsi_features') + ]) + .then(vals => { + // Add `suggestion=true` to all the nsi presets + // The preset json schema doesn't include it, but the iD code still uses it + Object.values(vals[0].presets).forEach(preset => preset.suggestion = true); + + presetManager.merge({ + presets: vals[0].presets, + featureCollection: vals[1] + }); + }) + ); +} + + +// `loadNsiData()` +// Returns a Promise fulfilled when the other data have been downloaded and processed +// +function loadNsiData() { + return ( + Promise.all([ + fileFetcher.get('nsi_data'), + fileFetcher.get('nsi_replacements'), + fileFetcher.get('nsi_trees') + ]) + .then(vals => { + _nsi = { + data: vals[0].nsi, // the raw name-suggestion-index data + replacements: vals[1].replacements, // trivial old->new qid replacements + trees: vals[2].trees, // metadata about trees, main tags + kvt: new Map(), // Map (k -> Map (v -> t) ) + qids: new Map(), // Map (wd/wp tag values -> qids) + ids: new Map() // Map (id -> NSI item) + }; + + _nsi.matcher = Matcher(); + _nsi.matcher.buildMatchIndex(_nsi.data); + _nsi.matcher.buildLocationIndex(_nsi.data, locationManager.loco()); + + Object.keys(_nsi.data).forEach(tkv => { + const category = _nsi.data[tkv]; + const parts = tkv.split('/', 3); // tkv = "tree/key/value" + const t = parts[0]; + const k = parts[1]; + const v = parts[2]; + + // Build a reverse index of keys -> values -> trees present in the name-suggestion-index + // Collect primary keys (e.g. "amenity", "craft", "shop", "man_made", "route", etc) + // "amenity": { + // "restaurant": "brands" + // } + let vmap = _nsi.kvt.get(k); + if (!vmap) { + vmap = new Map(); + _nsi.kvt.set(k, vmap); + } + vmap.set(v, t); + + const tree = _nsi.trees[t]; // e.g. "brands", "operators" + const mainTag = tree.mainTag; // e.g. "brand:wikidata", "operator:wikidata", etc + + const items = category.items || []; + items.forEach(item => { + // Remember some useful things for later, cache NSI id -> item + item.tkv = tkv; + item.mainTag = mainTag; + _nsi.ids.set(item.id, item); + + // Cache Wikidata/Wikipedia values -> qid, for #6416 + const wd = item.tags[mainTag]; + const wp = item.tags[mainTag.replace('wikidata', 'wikipedia')]; + if (wd) _nsi.qids.set(wd, wd); + if (wp && wd) _nsi.qids.set(wp, wd); + }); + }); + }) + ); +} + + +// `gatherKVs()` +// Gather all the k/v pairs that we will run through the NSI matcher. +// An OSM tags object can contain anything, but only a few tags will be interesting to NSI. +// +// This function will return the interesting tag pairs like: +// "amenity/restaurant", "man_made/flagpole" +// and fallbacks like +// "amenity/yes" +// excluding things like +// "highway", "surface", "ref", etc. +// +// Arguments +// `tags`: `Object` containing the feature's OSM tags +// Returns +// `Object` containing kv pairs to test: +// { +// 'primary': Set(), +// 'alternate': Set() +// } +// +function gatherKVs(tags) { + let primary = new Set(); + let alternate = new Set(); + + Object.keys(tags).forEach(osmkey => { + const osmvalue = tags[osmkey]; + if (!osmvalue) return; + + const vmap = _nsi.kvt.get(osmkey); + if (!vmap) return; + + primary.add(`${osmkey}/${osmvalue}`); + alternate.add(`${osmkey}/yes`); + }); + + // Can we try a generic building fallback match? - See #6122, #7197 + // Only try this if we do a preset match and find nothing else remarkable about that building. + // For example, a way with `building=yes` + `name=Westfield` may be a Westfield department store. + // But a way with `building=yes` + `name=Westfield` + `public_transport=station` is a train station for a town named "Westfield" + const preset = presetManager.matchTags(tags, 'area'); + if (buildingPreset[preset.id]) alternate.add('building/yes'); + + return { + primary: primary, + alternate: alternate + }; +} + + +// `gatherNames()` +// Gather all the namelike values that we will run through the NSI matcher. +// It will gather values primarily from tags `name`, `name:ru`, `flag:name` +// and fallback to alternate tags like `brand`, `brand:ru`, `alt_name` +// +// Arguments +// `tags`: `Object` containing the feature's OSM tags +// Returns +// `Object` containing namelike values to test: +// { +// 'primary': Set(), +// 'fallbacks': Set() +// } +// +function gatherNames(tags) { + const empty = { primary: new Set(), alternate: new Set() }; + let primary = new Set(); + let alternate = new Set(); + let patterns; + + // Patterns for matching OSM keys that might contain namelike values. + // These roughly correspond to the "trees" concept in name-suggestion-index, + // but they can't be trees because there is overlap between different trees + // (e.g. 'amenity/yes' could match something from the "brands" tree or the "operators" tree) + if (tags.route) { + patterns = { + primary: /^network$/i, + alternate: /^(operator|operator:\w+|network:\w+|\w+_name|\w+_name:\w+)$/i + }; + } else if (tags.man_made === 'flagpole') { + patterns = { + primary: /^(flag:name|flag:name:\w+)$/i, + alternate: /^(flag|flag:\w+|subject|subject:\w+)$/i // note: no `country`, we special-case it below + }; + } else { + patterns = { + primary: /^(name|name:\w+)$/i, + alternate: /^(brand|brand:\w+|operator|operator:\w+|\w+_name|\w+_name:\w+)/i, + }; + } + + Object.keys(tags).forEach(osmkey => { + const osmvalue = tags[osmkey]; + if (!osmvalue) return; + + if (isNamelike(osmkey, 'primary')) { + if (/;/.test(osmvalue)) return empty; // bail out if any namelike value contains a semicolon + primary.add(osmvalue); + } else if (isNamelike(osmkey, 'alternate')) { + if (/;/.test(osmvalue)) return empty; // bail out if any namelike value contains a semicolon + alternate.add(osmvalue); + } + }); + + // For flags only, fallback to `country` tag only if no other namelike values were found. + // See https://github.com/openstreetmap/iD/pull/8305#issuecomment-769174070 + if (tags.man_made === 'flagpole' && !primary.size && !alternate.size && !!tags.country) { + const osmvalue = tags.country; + if (/;/.test(osmvalue)) return empty; // bail out if any namelike value contains a semicolon + alternate.add(osmvalue); + } + + return { + primary: primary, + alternate: alternate + }; + + function isNamelike(osmkey, which) { + return patterns[which].test(osmkey) && !notNames.test(osmkey); + } +} + + +// `gatherTuples()` +// Generate all combinations of key,value,name that we want to test. +// This prioritizes them so that the primary name and k/v pairs go first +// +// Arguments +// `tryKVs`: `Object` containing primary and alternate k/v pairs to test +// `tryNames`: `Object` containing primary and alternate names to test +// Returns +// `Array`: tuple objects ordered by priority +// +function gatherTuples(tryKVs, tryNames) { + let tuples = []; + ['primary', 'alternate'].forEach(whichName => { + tryNames[whichName].forEach(n => { + ['primary', 'alternate'].forEach(whichKV => { + tryKVs[whichKV].forEach(kv => { + const parts = kv.split('/', 2); + const k = parts[0]; + const v = parts[1]; + tuples.push({ k: k, v: v, n: n }); + }); + }); + }); + }); + return tuples; +} + + +// `_upgradeTags()` +// Try to match a feature to a canonical record in name-suggestion-index +// and upgrade the tags to match. +// +// Arguments +// `tags`: `Object` containing the feature's OSM tags +// `loc`: Location where this feature exists, as a [lon, lat] +// Returns +// `Object`: The tags the the feature should have, or `null` if no changes needed +// +function _upgradeTags(tags, loc) { + let newTags = Object.assign({}, tags); // shallow copy + let changed = false; + + // Before anything, perform trivial Wikipedia/Wikidata replacements + Object.keys(newTags).forEach(osmkey => { + const matchTag = osmkey.match(/^(\w+:)?wikidata$/); + if (matchTag) { // Look at '*:wikidata' tags + const prefix = (matchTag[1] || ''); + const wd = newTags[osmkey]; + const replace = _nsi.replacements[wd]; // If it matches a QID in the replacement list... + + if (replace && replace.wikidata !== undefined) { // replace or delete `*:wikidata` tag + changed = true; + if (replace.wikidata) { + newTags[osmkey] = replace.wikidata; + } else { + delete newTags[osmkey]; + } + } + if (replace && replace.wikipedia !== undefined) { // replace or delete `*:wikipedia` tag + changed = true; + const wpkey = `${prefix}wikipedia`; + if (replace.wikipedia) { + newTags[wpkey] = replace.wikipedia; + } else { + delete newTags[wpkey]; + } + } + } + }); + + + // Gather key/value tag pairs to try to match + const tryKVs = gatherKVs(tags); + if (!tryKVs.primary.size && !tryKVs.alternate.size) return changed ? newTags : null; + + // Gather namelike tag values to try to match + const tryNames = gatherNames(tags); + + // Do `wikidata=*` or `wikipedia=*` tags identify this entity as a chain? - See #6416 + // If so, these tags can be swapped to e.g. `brand:wikidata`/`brand:wikipedia`. + const foundQID = _nsi.qids.get(tags.wikidata) || _nsi.qids.get(tags.wikipedia); + if (foundQID) tryNames.primary.add(foundQID); // matcher will recognize the Wikidata QID as name too + + if (!tryNames.primary.size && !tryNames.alternate.size) return changed ? newTags : null; + + // Order the [key,value,name] tuples - test primary before alternate + const tuples = gatherTuples(tryKVs, tryNames); + + for (let i = 0; i < tuples.length; i++) { + const tuple = tuples[i]; + const hits = _nsi.matcher.match(tuple.k, tuple.v, tuple.n, loc); // Attempt to match an item in NSI + + if (!hits || !hits.length) continue; // no match, try next tuple + if (hits[0].match !== 'primary' && hits[0].match !== 'alternate') continue; // a generic match, try next tuple + + // A match may contain multiple results, the first one is likely the best one for this location + // e.g. `['pfk-a54c14', 'kfc-1ff19c', 'kfc-658eea']` + let itemID, item; + for (let j = 0; j < hits.length; j++) { + const hit = hits[j]; + itemID = hit.itemID; + item = _nsi.ids.get(itemID); + if (!item) continue; + const mainTag = item.mainTag; // e.g. `brand:wikidata` + const itemQID = item.tags[mainTag]; // e.g. `brand:wikidata` qid + const notQID = newTags[`not:${mainTag}`]; // e.g. `not:brand:wikidata` qid + + if ( // Exceptions, skip this hit + (!itemQID || itemQID === notQID) || // no `*:wikidata` or matched a `not:*:wikidata` + (newTags.office && !item.tags.office) // feature may be a corporate office for a brand? - #6416 + ) { + item = null; + continue; // continue looking + } else { + break; // use `item` + } + } + + // can't use any of these hits, try next tuple + if (!item) continue; + + // At this point we have matched a canonical item and can suggest tag upgrades.. + const tkv = item.tkv; + const category = _nsi.data[tkv]; + const properties = category.properties || {}; + + // Preserve some tags that we specifally don't want NSI to overwrite. ('^name', sometimes) + const preserveTags = item.preserveTags || properties.preserveTags || []; + let regexes = preserveTags.map(s => new RegExp(s, 'i')); + regexes.push(/^building$/i, /^takeaway$/i); + + let keepTags = {}; + Object.keys(newTags).forEach(osmkey => { + if (regexes.some(regex => regex.test(osmkey))) { + keepTags[osmkey] = newTags[osmkey]; + } + }); + + // Remove any primary tags ("amenity", "craft", "shop", "man_made", "route", etc) + _nsi.kvt.forEach((v, k) => delete newTags[k]); + + // Replace mistagged `wikidata`/`wikipedia` with e.g. `brand:wikidata`/`brand:wikipedia` + if (foundQID) { + delete newTags.wikipedia; + delete newTags.wikidata; + } + + Object.assign(newTags, item.tags, keepTags); + return newTags; + } + + return changed ? newTags : null; +} + + +// `_isGenericName()` +// Is the `name` tag generic? +// +// Arguments +// `tags`: `Object` containing the feature's OSM tags +// Returns +// `true` if it is generic, `false` if not +// +function _isGenericName(tags) { + const n = tags.name; + if (!n) return false; + + // tryNames just contains the `name` tag value and nothing else + const tryNames = { primary: new Set([n]), alternate: new Set() }; + + // Gather key/value tag pairs to try to match + const tryKVs = gatherKVs(tags); + if (!tryKVs.primary.size && !tryKVs.alternate.size) return false; + + // Order the [key,value,name] tuples - test primary before alternate + const tuples = gatherTuples(tryKVs, tryNames); + + for (let i = 0; i < tuples.length; i++) { + const tuple = tuples[i]; + const hits = _nsi.matcher.match(tuple.k, tuple.v, tuple.n); // Attempt to match an item in NSI + + // If we get a `excludeGeneric` hit, this is a generic name. + if (hits && hits.length && hits[0].match === 'excludeGeneric') return true; + } + + return false; +} + + + +// PUBLIC INTERFACE + +export default { + + // `init()` + // On init, start preparing the name-suggestion-index + // + init: () => { + // Note: service.init is called immediately after the presetManager has started loading its data. + // We expect to chain onto an unfulfilled promise here. + setNsiSources(); + presetManager.ensureLoaded() + .then(() => loadNsiPresets()) + .then(() => delay(100)) // wait briefly for locationSets to enter the locationManager queue + .then(() => locationManager.mergeLocationSets([])) // wait for locationSets to resolve + .then(() => loadNsiData()) + .then(() => _nsiStatus = 'ok') + .catch(() => _nsiStatus = 'failed'); + + function delay(msec) { + return new Promise(resolve => { + window.setTimeout(resolve, msec); + }); + } + }, + + + // `reset()` + // Reset is called when user saves data to OSM (does nothing here) + // + reset: () => {}, + + + // `status()` + // To let other code know how it's going... + // + // Returns + // `String`: 'loading', 'ok', 'failed' + // + status: () => _nsiStatus, + + + // `isGenericName()` + // Is the `name` tag generic? + // + // Arguments + // `tags`: `Object` containing the feature's OSM tags + // Returns + // `true` if it is generic, `false` if not + // + isGenericName: (tags) => _isGenericName(tags), + + + // `upgradeTags()` + // Suggest tag upgrades. + // This function will not modify the input tags, it makes a copy. + // + // Arguments + // `tags`: `Object` containing the feature's OSM tags + // `loc`: Location where this feature exists, as a [lon, lat] + // Returns + // `Object`: The tags the the feature should have, or `null` if no change + // + upgradeTags: (tags, loc) => _upgradeTags(tags, loc), + + + // `cache()` + // Direct access to the NSI cache, useful for testing or breaking things + // + // Returns + // `Object`: the internal NSI cache + // + cache: () => _nsi +}; diff --git a/modules/validations/outdated_tags.js b/modules/validations/outdated_tags.js index 5cf65282f..a8d1cc54f 100644 --- a/modules/validations/outdated_tags.js +++ b/modules/validations/outdated_tags.js @@ -1,22 +1,20 @@ import { t } from '../core/localizer'; -import { matcher as Matcher } from 'name-suggestion-index'; -import { fileFetcher, locationManager } from '../core'; import { actionChangePreset } from '../actions/change_preset'; import { actionChangeTags } from '../actions/change_tags'; import { actionUpgradeTags } from '../actions/upgrade_tags'; +import { fileFetcher } from '../core'; import { presetManager } from '../presets'; +import { services } from '../services'; import { osmIsOldMultipolygonOuterMember, osmOldMultipolygonOuterMemberOfRelation } from '../osm/multipolygon'; -import { utilArrayUniq, utilDisplayLabel, utilHashcode, utilTagDiff } from '../util'; +import { utilDisplayLabel, utilHashcode, utilTagDiff } from '../util'; import { validationIssue, validationIssueFix } from '../core/validation'; export function validationOutdatedTags() { const type = 'outdated_tags'; let _waitingForDeprecated = true; - let _waitingForNSI = true; let _dataDeprecated; - let _nsi; // fetch deprecated tags fileFetcher.get('deprecated') @@ -25,164 +23,6 @@ export function validationOutdatedTags() { .finally(() => _waitingForDeprecated = false); - function delay(msec) { - return new Promise(resolve => { - window.setTimeout(resolve, msec); - }); - } - - // This Promise will fulfill after NSI presets are loaded and locations merged into the locationManager. - function waitForNSIPresets() { - return Promise.all([ - fileFetcher.get('nsi_presets'), - fileFetcher.get('nsi_features') - ]) - .then(() => delay(1000)) // wait 1 sec for locationSets to enter the locationManager queue - .then(() => locationManager.mergeLocationSets([]) ); - } - - // Fetch the name-suggestion-index data - waitForNSIPresets() - .then(() => Promise.all([ - fileFetcher.get('nsi_data'), - fileFetcher.get('nsi_replacements'), - fileFetcher.get('nsi_trees') - ])) - .then(vals => { - if (_nsi) return _nsi; - - _nsi = { - data: vals[0].nsi, // the raw name-suggestion-index data - replacements: vals[1].replacements, // trivial old->new qid replacements - trees: vals[2].trees, // metadata about trees, main tags - keys: new Set(), // primary osm keys to check for a NSI match - qids: new Map(), // Map wd/wp tag values -> qids - ids: new Map() // Map id -> NSI item - }; - - _nsi.matcher = Matcher(); - _nsi.matcher.buildMatchIndex(_nsi.data); - _nsi.matcher.buildLocationIndex(_nsi.data, locationManager.loco()); - - Object.keys(_nsi.data).forEach(tkv => { - const parts = tkv.split('/', 3); // tkv = "tree/key/value" - const t = parts[0]; - const k = parts[1]; - - // Collect primary keys (e.g. "amenity", "craft", "shop", "man_made", "route", etc) - _nsi.keys.add(k); - - const tree = _nsi.trees[t]; // e.g. "brands", "operators" - const mainTag = tree.mainTag; // e.g. "brand:wikidata", "operator:wikidata", etc - - const items = _nsi.data[tkv] || []; - items.forEach(item => { - // Cache NSI ids and main tags - item.mainTag = mainTag; - _nsi.ids.set(item.id, item); - - // Cache Wikidata/Wikipedia values, for #6416 - const wd = item.tags[mainTag]; - const wp = item.tags[mainTag.replace('wikidata', 'wikipedia')]; - if (wd) _nsi.qids.set(wd, wd); - if (wp && wd) _nsi.qids.set(wp, wd); - }); - }); - - _nsi.keys.add('building'); // fallback can match building=* for some categories - return _nsi; - }) - .catch(() => { /* ignore */ }) - .finally(() => _waitingForNSI = false); - - - // Patterns for matching OSM keys that might contain namelike values. - // These roughly correspond to the "trees" concept in name-suggestion-index, - // but they can't be trees because there is overlap between different trees - // (e.g. 'amenity/yes' could match something from the "brands" tree or the "operators" tree) - const namePatterns = { - transit: { - primary: [ - /^network$/i - ], - alternate: [ - /^(operator|network)(:\\w+)?$/i, // `network:guid`, `network:short`, etc. - /^(\\w+)_name(:\\w+)?$/i - ] - }, - flags: { - primary: [ - /^flag:name(:\\w+)?$/i // `flag:name`, poss. w/ lang suffix - ], - alternate: [ - /^(flag|subject)(:\\w+)?$/i // note: no `country`, we special-case it in gatherNames - ] - }, - pois: { - primary: [ - /^name(:\\w+)?$/i // `name`, poss. w/ lang suffix - ], - alternate: [ - /^(brand|operator)(:\\w+)?$/i, // `brand` or `operator`, poss. w/ lang suffix - /^(\\w+)_name(:\\w+)?$/i // `alt_name`, `short_name`, `official_name`, poss. w/ lang suffix - ] - }, - }; - - // There are a few exceptions to the namelike regexes. - // Usually a tag suffix contains a language code like `name:en`, `name:ru` - // but we want to exclude things like `operator:type`, `name:etymology`, etc.. - const notNames = /:(colour|type|left|right|etymology|pronunciation|wikipedia)$/i; - - - // Gather all the namelike values that we will run through the NSI matcher - function gatherNames(tags) { - let names = []; - let patterns; - - if (tags.route) { - patterns = namePatterns.transit; - } else if (tags.man_made === 'flagpole') { - patterns = namePatterns.flags; - } else { - patterns = namePatterns.pois; - } - - let osmkeys = Object.keys(tags); - for (let j = 0; j < osmkeys.length; j++) { - const k = osmkeys[j]; - const v = tags[k]; - if (!v) continue; - - if (isNamelike(k, 'primary')) { - if (/;/.test(v)) return []; // bail out if any namelike value contains a semicolon - names.unshift(v); // primary names at the beginning of the list - } - else if (isNamelike(k, 'alternate')) { - if (/;/.test(v)) return []; // bail out if any namelike value contains a semicolon - names.push(v); // alternate names at the end of the list - } - } - - names = utilArrayUniq(names); - - // For flags only, fallback to `country` tag only if no other namelike values were found. - // See https://github.com/openstreetmap/iD/pull/8305#issuecomment-769174070 - if (tags.man_made === 'flagpole' && !names.length && !!tags.country) { - const v = tags.country; - if (/;/.test(v)) return []; // bail out if any namelike value contains a semicolon - names = [v]; - } - - return names; - - - function isNamelike(osmkey, which) { - return patterns[which].some(regex => regex.test(osmkey) && !notNames.test(osmkey)); - } - } - - function oldTagIssues(entity, graph) { const oldTags = Object.assign({}, entity.tags); // shallow copy let preset = presetManager.match(entity, graph); @@ -224,143 +64,22 @@ export function validationOutdatedTags() { } // Attempt to match a canonical record in the name-suggestion-index. - // This index contains the most correct tagging for many commonly mapped features. - // See https://github.com/osmlab/name-suggestion-index and https://nsi.guide - if (_nsi) { - - // Perform trivial Wikipedia/Wikidata replacements - Object.keys(newTags).forEach(osmkey => { - const matchTag = osmkey.match(/^(\w+:)?wikidata$/); - if (matchTag) { // Look at '*:wikidata' tags - const prefix = (matchTag[1] || ''); - const wd = newTags[osmkey]; - const replace = _nsi.replacements[wd]; // If it matches a QID in the replacement list... - - if (replace && replace.wikidata !== undefined) { // replace or delete `*:wikidata` tag - if (replace.wikidata) { - newTags[osmkey] = replace.wikidata; - } else { - delete newTags[osmkey]; - } - } - if (replace && replace.wikipedia !== undefined) { // replace or delete `*:wikipedia` tag - const wpkey = `${prefix}wikipedia`; - if (replace.wikipedia) { - newTags[wpkey] = replace.wikipedia; - } else { - delete newTags[wpkey]; - } - } - } - }); - - // Do `wikidata` or `wikipedia` tags identify this entity as a chain? #6416 - // If so, these tags can be swapped to e.g. `brand:wikidata`/`brand:wikipedia` below. - let foundQID = _nsi.qids.get(newTags.wikidata) || _nsi.qids.get(newTags.wikipedia); - - // We will only spend time to compute these things if it's necessary - let names, loc, match; - - // Try each primary key ("amenity", "craft", "shop", "man_made", "route", etc) - const nsiKeys = Array.from(_nsi.keys); - for (let i = 0; i < nsiKeys.length; i++) { - if (match) break; // matched already, stop looking - let k = nsiKeys[i]; - let v = newTags[k]; - if (!v) continue; - - // Only attempt a match on building/yes if there is nothing else remarkable about that building. - if (k === 'building') { - v = 'yes'; - if (preset.id !== 'building/yes') continue; // the feature matched a better preset - } - - if (!loc) { // collect location for this feature only once - loc = entity.extent(graph).center(); - } - - if (!names) { // collect names for this feature only once - names = gatherNames(newTags); - if (foundQID) names.push(foundQID); // matcher will recognize the QID as an alternate name too - } - - // Try each namelike value - for (let n = 0; n < names.length; n++) { - match = _nsi.matcher.match(k, v, names[n], loc); // Attempt to match an item in NSI - if (!match) continue; // keep looking - - // If we get here, there was a match.. - // A match may contain multiple results, the first one is the best one for this location - // e.g. `['pfk-a54c14', 'kfc-1ff19c', 'kfc-658eea']` - const itemID = match[0].itemID; - const item = _nsi.ids.get(itemID); - const mainTag = item.mainTag; // e.g. `brand:wikidata` - const itemQID = item.tags[mainTag]; // e.g. `brand:wikidata` qid - const notQID = newTags[`not:${mainTag}`]; // e.g. `not:brand:wikidata` qid - - // Exceptions, throw out the match - if ( - (!itemQID || itemQID === notQID) || // no `*:wikidata` or matched a `not:*:wikidata` - (newTags.office && !item.tags.office) // feature may be a corporate office for a brand? - #6416 - ) { - match = null; // forget match and keep looking - continue; // (it might make sense to stop looking, not sure) - } - - // We are keeping the match at this point + const nsi = services.nsi; + let waitingForNsi = false; + if (nsi) { + waitingForNsi = (nsi.status() === 'loading'); + if (!waitingForNsi) { + const loc = entity.extent(graph).center(); + const result = nsi.upgradeTags(newTags, loc); + if (result) { + newTags = result; subtype = 'noncanonical_brand'; - - // Keys that we don't want NSI to overwrite. - let keepKeys = [/^building$/i, /^flag:name$/i, /^takeaway$/i]; - - // Don't overwrite a `name` tag if this preset shows a `brand` or `operator` field. - // (For presets like hotels, car dealerships, post offices, the `name` should be left alone) - // see also similar logic in `localized.js` - const nsiPreset = presetManager.matchTags(item.tags, 'point'); // (the actual geometry doesn't matter) - if (nsiPreset) { - const fields = nsiPreset.fields(); - const showsBrandField = fields.some(d => d.id === 'brand'); - const showsOperatorField = fields.some(d => d.id === 'operator'); - const setsName = item.tags.name; - const setsBrandWikidata = item.tags['brand:wikidata']; - const setsOperatorWikidata = item.tags['operator:wikidata']; - - if (setsName && ( - (setsBrandWikidata && showsBrandField) || - (setsOperatorWikidata && showsOperatorField) - )) { - keepKeys.push(/^name(:\w+)?$/i); // `name`, `name:en`, etc - } - } - - // Preserve some tag values that we don't want NSI to overwrite. - let keepTags = {}; - Object.keys(newTags).forEach(k => { - if (keepKeys.some(pattern => pattern.test(k))) { - keepTags[k] = newTags[k]; - } - }); - - // Replace the primary tags with what's in NSI ("amenity", "craft", "shop", "man_made", "route", etc) - nsiKeys.forEach(k => delete newTags[k]); - // Replace `wikidata`/`wikipedia` with e.g. `brand:wikidata`/`brand:wikipedia` - if (foundQID) { - delete newTags.wikipedia; - delete newTags.wikidata; - } - - Object.assign(newTags, item.tags, keepTags); - break; // stop looking } } - - // maybe someday: match features without the location to determine - // if a feature appears somewhere in the world that it shouldn't. - - } // end if _nsi + } let issues = []; - issues.provisional = (_waitingForDeprecated || _waitingForNSI); + issues.provisional = (_waitingForDeprecated || waitingForNsi); // determine diff const tagDiff = utilTagDiff(oldTags, newTags); diff --git a/modules/validations/suspicious_name.js b/modules/validations/suspicious_name.js index 435bca2af..d4c8e30ed 100644 --- a/modules/validations/suspicious_name.js +++ b/modules/validations/suspicious_name.js @@ -1,8 +1,8 @@ -import { fileFetcher } from '../core/file_fetcher'; -import { t, localizer } from '../core/localizer'; -import { presetManager } from '../presets'; -import { validationIssue, validationIssueFix } from '../core/validation'; import { actionChangeTags } from '../actions/change_tags'; +import { presetManager } from '../presets'; +import { services } from '../services'; +import { t, localizer } from '../core/localizer'; +import { validationIssue, validationIssueFix } from '../core/validation'; export function validationSuspiciousName() { @@ -11,27 +11,23 @@ export function validationSuspiciousName() { 'aerialway', 'aeroway', 'amenity', 'building', 'craft', 'highway', 'leisure', 'railway', 'man_made', 'office', 'shop', 'tourism', 'waterway' ]; - let _dataGenerics; - let _waitingForGenerics = true; - - fileFetcher.get('nsi_generics') - .then(data => { - if (_dataGenerics) return _dataGenerics; - - // known list of generic names (e.g. "bar") - _dataGenerics = data.genericWords.map(pattern => new RegExp(pattern, 'i')); - return _dataGenerics; - }) - .catch(() => { /* ignore */ }) - .finally(() => _waitingForGenerics = false); + let _waitingForNsi = false; - function isDiscardedSuggestionName(lowercaseName) { - if (!_dataGenerics) return false; - return _dataGenerics.some(regex => regex.test(lowercaseName)); + // Attempt to match a generic record in the name-suggestion-index. + function isGenericMatchInNsi(tags) { + const nsi = services.nsi; + if (nsi) { + _waitingForNsi = (nsi.status() === 'loading'); + if (!_waitingForNsi) { + return nsi.isGenericName(tags); + } + } + return false; } - // test if the name is just the key or tag value (e.g. "park") + + // Test if the name is just the key or tag value (e.g. "park") function nameMatchesRawTag(lowercaseName, tags) { for (let i = 0; i < keysToTestForGenericValues.length; i++) { let key = keysToTestForGenericValues[i]; @@ -51,7 +47,7 @@ export function validationSuspiciousName() { function isGenericName(name, tags) { name = name.toLowerCase(); - return nameMatchesRawTag(name, tags) || isDiscardedSuggestionName(name); + return nameMatchesRawTag(name, tags) || isGenericMatchInNsi(tags); } function makeGenericNameIssue(entityId, nameKey, genericName, langCode) { @@ -173,7 +169,7 @@ export function validationSuspiciousName() { } } if (isGenericName(value, tags)) { - issues.provisional = _waitingForGenerics; // retry later if we don't have the generics yet + issues.provisional = _waitingForNsi; // retry later if we are waiting on NSI to finish loading issues.push(makeGenericNameIssue(entity.id, key, value, langCode)); } } diff --git a/test/spec/spec_helpers.js b/test/spec/spec_helpers.js index cbf36c885..db97d4bf1 100644 --- a/test/spec/spec_helpers.js +++ b/test/spec/spec_helpers.js @@ -20,12 +20,6 @@ iD.fileFetcher.cache().preset_presets = {}; // Initializing `coreContext` initializes `_validator`, which tries loading: iD.fileFetcher.cache().deprecated = []; -iD.fileFetcher.cache().nsi_presets = { presets: {} }; -iD.fileFetcher.cache().nsi_data = { nsi: {} }; -iD.fileFetcher.cache().nsi_features = { type: 'FeatureCollection', features: [] }; -iD.fileFetcher.cache().nsi_generics = { genericWords: [] }; -iD.fileFetcher.cache().nsi_replacements = { replacements: {} }; -iD.fileFetcher.cache().nsi_trees = { trees: {} }; // Initializing `coreContext` initializes `_uploader`, which tries loading: iD.fileFetcher.cache().discarded = {}; diff --git a/test/spec/validations/suspicious_name.js b/test/spec/validations/suspicious_name.js index 7b6901a37..be0384ede 100644 --- a/test/spec/validations/suspicious_name.js +++ b/test/spec/validations/suspicious_name.js @@ -2,11 +2,38 @@ describe('iD.validations.suspicious_name', function () { var context; before(function() { - iD.fileFetcher.cache().nsi_generics = { genericWords: ['^stores?$'] }; + iD.services.nsi = iD.serviceNsi; + iD.fileFetcher.cache().nsi_presets = { presets: {} }; + iD.fileFetcher.cache().nsi_features = { type: 'FeatureCollection', features: [] }; + iD.fileFetcher.cache().nsi_replacements = { replacements: {} }; + + iD.fileFetcher.cache().nsi_trees = { + trees: { + brands: { + mainTag: 'brand:wikidata' + } + } + }; + iD.fileFetcher.cache().nsi_data = { + nsi: { + 'brands/shop/supermarket': { + properties: { + path: 'brands/shop/supermarket', + exclude: { + generic: ['^(mini|super)?\\s?(market|mart|mercado)( municipal)?$' ], + named: ['^(famiglia cooperativa|семейный)$'] + } + } + } + } + }; + iD.fileFetcher.cache().nsi_generics = { + genericWords: ['^stores?$'] + }; }); after(function() { - iD.fileFetcher.cache().nsi_generics = { genericWords: [] }; + delete iD.services.nsi; }); beforeEach(function() { @@ -86,8 +113,33 @@ describe('iD.validations.suspicious_name', function () { }, 20); }); - it('flags feature with a known generic name', function(done) { - createWay({ shop: 'supermarket', name: 'Store' }); + it('ignores feature matching excludeNamed pattern in name-suggestion-index', function(done) { + createWay({ shop: 'supermarket', name: 'famiglia cooperativa' }); + var validator = iD.validationSuspiciousName(context); + window.setTimeout(function() { // async, so data will be available + var issues = validate(validator); + expect(issues).to.have.lengthOf(0); + done(); + }, 20); + }); + + it('flags feature matching a excludeGeneric pattern in name-suggestion-index', function(done) { + createWay({ shop: 'supermarket', name: 'super mercado' }); + var validator = iD.validationSuspiciousName(context); + window.setTimeout(function() { // async, so data will be available + var issues = validate(validator); + expect(issues).to.have.lengthOf(1); + var issue = issues[0]; + expect(issue.type).to.eql('suspicious_name'); + expect(issue.subtype).to.eql('generic_name'); + expect(issue.entityIds).to.have.lengthOf(1); + expect(issue.entityIds[0]).to.eql('w-1'); + done(); + }, 20); + }); + + it('flags feature matching a global exclude pattern in name-suggestion-index', function(done) { + createWay({ shop: 'supermarket', name: 'store' }); var validator = iD.validationSuspiciousName(context); window.setTimeout(function() { // async, so data will be available var issues = validate(validator);