Move all of NSI into a service, rewrite matcher code

This commit is contained in:
Bryan Housel
2021-03-03 11:38:34 -05:00
parent 4d9336b1c7
commit 1f6a2121c4
8 changed files with 651 additions and 388 deletions
+1 -22
View File
@@ -557,7 +557,7 @@ export function coreContext() {
Object.values(services).forEach(service => {
if (service && typeof service.init === 'function') {
service.init();
service.init(context);
}
});
@@ -579,30 +579,9 @@ export function coreContext() {
_ui.ensureLoaded()
.then(() => {
_photos.init();
loadNSIPresets();
});
}
}
function loadNSIPresets() {
return Promise.all([
fileFetcher.get('nsi_presets'),
fileFetcher.get('nsi_features')
])
.then(vals => {
// Add `suggestion=true` to all the nsi presets
// The preset json schema doesn't include it, but the iD code still uses it
Object.values(vals[0].presets).forEach(preset => preset.suggestion = true);
presetManager.merge({
presets: vals[0].presets,
featureCollection: vals[1]
});
})
.catch(() => { /* ignore */ });
}
};
return context;
-8
View File
@@ -19,14 +19,6 @@ export function coreFileFetcher() {
'keepRight': 'data/keepRight.min.json',
'languages': 'data/languages.min.json',
'locales': 'locales/index.min.json',
'nsi_presets': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/presets/nsi-id-presets.min.json',
'nsi_data': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/nsi.min.json',
'nsi_features': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/featureCollection.min.json',
'nsi_generics': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/genericWords.min.json',
'nsi_replacements': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/replacements.min.json',
'nsi_trees': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/trees.min.json',
'oci_features': 'https://cdn.jsdelivr.net/npm/osm-community-index@3/dist/featureCollection.min.json',
'oci_resources': 'https://cdn.jsdelivr.net/npm/osm-community-index@3/dist/resources.min.json',
'preset_categories': 'https://cdn.jsdelivr.net/npm/@openstreetmap/id-tagging-schema@3/dist/preset_categories.min.json',
+33 -30
View File
@@ -4,6 +4,7 @@ import serviceOsmose from './osmose';
import serviceMapillary from './mapillary';
import serviceMapRules from './maprules';
import serviceNominatim from './nominatim';
import serviceNsi from './nsi';
import serviceOpenstreetcam from './openstreetcam';
import serviceOsm from './osm';
import serviceOsmWikibase from './osm_wikibase';
@@ -14,36 +15,38 @@ import serviceWikidata from './wikidata';
import serviceWikipedia from './wikipedia';
export var services = {
geocoder: serviceNominatim,
keepRight: serviceKeepRight,
improveOSM: serviceImproveOSM,
osmose: serviceOsmose,
mapillary: serviceMapillary,
openstreetcam: serviceOpenstreetcam,
osm: serviceOsm,
osmWikibase: serviceOsmWikibase,
maprules: serviceMapRules,
streetside: serviceStreetside,
taginfo: serviceTaginfo,
vectorTile: serviceVectorTile,
wikidata: serviceWikidata,
wikipedia: serviceWikipedia
export let services = {
geocoder: serviceNominatim,
keepRight: serviceKeepRight,
improveOSM: serviceImproveOSM,
osmose: serviceOsmose,
mapillary: serviceMapillary,
nsi: serviceNsi,
openstreetcam: serviceOpenstreetcam,
osm: serviceOsm,
osmWikibase: serviceOsmWikibase,
maprules: serviceMapRules,
streetside: serviceStreetside,
taginfo: serviceTaginfo,
vectorTile: serviceVectorTile,
wikidata: serviceWikidata,
wikipedia: serviceWikipedia
};
export {
serviceKeepRight,
serviceImproveOSM,
serviceOsmose,
serviceMapillary,
serviceMapRules,
serviceNominatim,
serviceOpenstreetcam,
serviceOsm,
serviceOsmWikibase,
serviceStreetside,
serviceTaginfo,
serviceVectorTile,
serviceWikidata,
serviceWikipedia
};
serviceKeepRight,
serviceImproveOSM,
serviceOsmose,
serviceMapillary,
serviceMapRules,
serviceNominatim,
serviceNsi,
serviceOpenstreetcam,
serviceOsm,
serviceOsmWikibase,
serviceStreetside,
serviceTaginfo,
serviceVectorTile,
serviceWikidata,
serviceWikipedia
};
+528
View File
@@ -0,0 +1,528 @@
import { matcher as Matcher } from 'name-suggestion-index';
import { fileFetcher, locationManager } from '../core';
import { presetManager } from '../presets';
// This service contains all the code related to the **name-suggestion-index** (aka NSI)
// NSI contains the most correct tagging for many commonly mapped features.
// See https://github.com/osmlab/name-suggestion-index and https://nsi.guide
// DATA
let _nsiStatus = 'loading'; // 'loading', 'ok', 'failed'
let _nsi = {};
// Sometimes we can upgrade a feature tagged like `building=yes` to a better tag.
const buildingPreset = {
'building/commercial': true,
'building/government': true,
'building/hotel': true,
'building/retail': true,
'building/office': true,
'building/supermarket': true,
'building/yes': true
};
// There are a few exceptions to the namelike regexes.
// Usually a tag suffix contains a language code like `name:en`, `name:ru`
// but we want to exclude things like `operator:type`, `name:etymology`, etc..
const notNames = /:(colou?r|type|forward|backward|left|right|etymology|pronunciation|wikipedia)$/i;
// PRIVATE FUNCTIONS
// `setNsiSources()`
// Adds the sources to iD's filemap so we can start downloading data.
//
function setNsiSources() {
const sources = {
'nsi_presets': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/presets/nsi-id-presets.min.json',
'nsi_data': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/nsi.min.json',
'nsi_features': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/featureCollection.min.json',
'nsi_generics': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/genericWords.min.json',
'nsi_replacements': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/replacements.min.json',
'nsi_trees': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/trees.min.json'
};
let fileMap = fileFetcher.fileMap();
for (const k in sources) {
fileMap[k] = sources[k];
}
}
// `loadNsiPresets()`
// Returns a Promise fulfilled when the presets have been downloaded and merged into iD.
//
function loadNsiPresets() {
return (
Promise.all([
fileFetcher.get('nsi_presets'),
fileFetcher.get('nsi_features')
])
.then(vals => {
// Add `suggestion=true` to all the nsi presets
// The preset json schema doesn't include it, but the iD code still uses it
Object.values(vals[0].presets).forEach(preset => preset.suggestion = true);
presetManager.merge({
presets: vals[0].presets,
featureCollection: vals[1]
});
})
);
}
// `loadNsiData()`
// Returns a Promise fulfilled when the other data have been downloaded and processed
//
function loadNsiData() {
return (
Promise.all([
fileFetcher.get('nsi_data'),
fileFetcher.get('nsi_replacements'),
fileFetcher.get('nsi_trees')
])
.then(vals => {
_nsi = {
data: vals[0].nsi, // the raw name-suggestion-index data
replacements: vals[1].replacements, // trivial old->new qid replacements
trees: vals[2].trees, // metadata about trees, main tags
kvt: new Map(), // Map (k -> Map (v -> t) )
qids: new Map(), // Map (wd/wp tag values -> qids)
ids: new Map() // Map (id -> NSI item)
};
_nsi.matcher = Matcher();
_nsi.matcher.buildMatchIndex(_nsi.data);
_nsi.matcher.buildLocationIndex(_nsi.data, locationManager.loco());
Object.keys(_nsi.data).forEach(tkv => {
const category = _nsi.data[tkv];
const parts = tkv.split('/', 3); // tkv = "tree/key/value"
const t = parts[0];
const k = parts[1];
const v = parts[2];
// Build a reverse index of keys -> values -> trees present in the name-suggestion-index
// Collect primary keys (e.g. "amenity", "craft", "shop", "man_made", "route", etc)
// "amenity": {
// "restaurant": "brands"
// }
let vmap = _nsi.kvt.get(k);
if (!vmap) {
vmap = new Map();
_nsi.kvt.set(k, vmap);
}
vmap.set(v, t);
const tree = _nsi.trees[t]; // e.g. "brands", "operators"
const mainTag = tree.mainTag; // e.g. "brand:wikidata", "operator:wikidata", etc
const items = category.items || [];
items.forEach(item => {
// Remember some useful things for later, cache NSI id -> item
item.tkv = tkv;
item.mainTag = mainTag;
_nsi.ids.set(item.id, item);
// Cache Wikidata/Wikipedia values -> qid, for #6416
const wd = item.tags[mainTag];
const wp = item.tags[mainTag.replace('wikidata', 'wikipedia')];
if (wd) _nsi.qids.set(wd, wd);
if (wp && wd) _nsi.qids.set(wp, wd);
});
});
})
);
}
// `gatherKVs()`
// Gather all the k/v pairs that we will run through the NSI matcher.
// An OSM tags object can contain anything, but only a few tags will be interesting to NSI.
//
// This function will return the interesting tag pairs like:
// "amenity/restaurant", "man_made/flagpole"
// and fallbacks like
// "amenity/yes"
// excluding things like
// "highway", "surface", "ref", etc.
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// Returns
// `Object` containing kv pairs to test:
// {
// 'primary': Set(),
// 'alternate': Set()
// }
//
function gatherKVs(tags) {
let primary = new Set();
let alternate = new Set();
Object.keys(tags).forEach(osmkey => {
const osmvalue = tags[osmkey];
if (!osmvalue) return;
const vmap = _nsi.kvt.get(osmkey);
if (!vmap) return;
primary.add(`${osmkey}/${osmvalue}`);
alternate.add(`${osmkey}/yes`);
});
// Can we try a generic building fallback match? - See #6122, #7197
// Only try this if we do a preset match and find nothing else remarkable about that building.
// For example, a way with `building=yes` + `name=Westfield` may be a Westfield department store.
// But a way with `building=yes` + `name=Westfield` + `public_transport=station` is a train station for a town named "Westfield"
const preset = presetManager.matchTags(tags, 'area');
if (buildingPreset[preset.id]) alternate.add('building/yes');
return {
primary: primary,
alternate: alternate
};
}
// `gatherNames()`
// Gather all the namelike values that we will run through the NSI matcher.
// It will gather values primarily from tags `name`, `name:ru`, `flag:name`
// and fallback to alternate tags like `brand`, `brand:ru`, `alt_name`
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// Returns
// `Object` containing namelike values to test:
// {
// 'primary': Set(),
// 'fallbacks': Set()
// }
//
function gatherNames(tags) {
const empty = { primary: new Set(), alternate: new Set() };
let primary = new Set();
let alternate = new Set();
let patterns;
// Patterns for matching OSM keys that might contain namelike values.
// These roughly correspond to the "trees" concept in name-suggestion-index,
// but they can't be trees because there is overlap between different trees
// (e.g. 'amenity/yes' could match something from the "brands" tree or the "operators" tree)
if (tags.route) {
patterns = {
primary: /^network$/i,
alternate: /^(operator|operator:\w+|network:\w+|\w+_name|\w+_name:\w+)$/i
};
} else if (tags.man_made === 'flagpole') {
patterns = {
primary: /^(flag:name|flag:name:\w+)$/i,
alternate: /^(flag|flag:\w+|subject|subject:\w+)$/i // note: no `country`, we special-case it below
};
} else {
patterns = {
primary: /^(name|name:\w+)$/i,
alternate: /^(brand|brand:\w+|operator|operator:\w+|\w+_name|\w+_name:\w+)/i,
};
}
Object.keys(tags).forEach(osmkey => {
const osmvalue = tags[osmkey];
if (!osmvalue) return;
if (isNamelike(osmkey, 'primary')) {
if (/;/.test(osmvalue)) return empty; // bail out if any namelike value contains a semicolon
primary.add(osmvalue);
} else if (isNamelike(osmkey, 'alternate')) {
if (/;/.test(osmvalue)) return empty; // bail out if any namelike value contains a semicolon
alternate.add(osmvalue);
}
});
// For flags only, fallback to `country` tag only if no other namelike values were found.
// See https://github.com/openstreetmap/iD/pull/8305#issuecomment-769174070
if (tags.man_made === 'flagpole' && !primary.size && !alternate.size && !!tags.country) {
const osmvalue = tags.country;
if (/;/.test(osmvalue)) return empty; // bail out if any namelike value contains a semicolon
alternate.add(osmvalue);
}
return {
primary: primary,
alternate: alternate
};
function isNamelike(osmkey, which) {
return patterns[which].test(osmkey) && !notNames.test(osmkey);
}
}
// `gatherTuples()`
// Generate all combinations of key,value,name that we want to test.
// This prioritizes them so that the primary name and k/v pairs go first
//
// Arguments
// `tryKVs`: `Object` containing primary and alternate k/v pairs to test
// `tryNames`: `Object` containing primary and alternate names to test
// Returns
// `Array`: tuple objects ordered by priority
//
function gatherTuples(tryKVs, tryNames) {
let tuples = [];
['primary', 'alternate'].forEach(whichName => {
tryNames[whichName].forEach(n => {
['primary', 'alternate'].forEach(whichKV => {
tryKVs[whichKV].forEach(kv => {
const parts = kv.split('/', 2);
const k = parts[0];
const v = parts[1];
tuples.push({ k: k, v: v, n: n });
});
});
});
});
return tuples;
}
// `_upgradeTags()`
// Try to match a feature to a canonical record in name-suggestion-index
// and upgrade the tags to match.
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// `loc`: Location where this feature exists, as a [lon, lat]
// Returns
// `Object`: The tags the the feature should have, or `null` if no changes needed
//
function _upgradeTags(tags, loc) {
let newTags = Object.assign({}, tags); // shallow copy
let changed = false;
// Before anything, perform trivial Wikipedia/Wikidata replacements
Object.keys(newTags).forEach(osmkey => {
const matchTag = osmkey.match(/^(\w+:)?wikidata$/);
if (matchTag) { // Look at '*:wikidata' tags
const prefix = (matchTag[1] || '');
const wd = newTags[osmkey];
const replace = _nsi.replacements[wd]; // If it matches a QID in the replacement list...
if (replace && replace.wikidata !== undefined) { // replace or delete `*:wikidata` tag
changed = true;
if (replace.wikidata) {
newTags[osmkey] = replace.wikidata;
} else {
delete newTags[osmkey];
}
}
if (replace && replace.wikipedia !== undefined) { // replace or delete `*:wikipedia` tag
changed = true;
const wpkey = `${prefix}wikipedia`;
if (replace.wikipedia) {
newTags[wpkey] = replace.wikipedia;
} else {
delete newTags[wpkey];
}
}
}
});
// Gather key/value tag pairs to try to match
const tryKVs = gatherKVs(tags);
if (!tryKVs.primary.size && !tryKVs.alternate.size) return changed ? newTags : null;
// Gather namelike tag values to try to match
const tryNames = gatherNames(tags);
// Do `wikidata=*` or `wikipedia=*` tags identify this entity as a chain? - See #6416
// If so, these tags can be swapped to e.g. `brand:wikidata`/`brand:wikipedia`.
const foundQID = _nsi.qids.get(tags.wikidata) || _nsi.qids.get(tags.wikipedia);
if (foundQID) tryNames.primary.add(foundQID); // matcher will recognize the Wikidata QID as name too
if (!tryNames.primary.size && !tryNames.alternate.size) return changed ? newTags : null;
// Order the [key,value,name] tuples - test primary before alternate
const tuples = gatherTuples(tryKVs, tryNames);
for (let i = 0; i < tuples.length; i++) {
const tuple = tuples[i];
const hits = _nsi.matcher.match(tuple.k, tuple.v, tuple.n, loc); // Attempt to match an item in NSI
if (!hits || !hits.length) continue; // no match, try next tuple
if (hits[0].match !== 'primary' && hits[0].match !== 'alternate') continue; // a generic match, try next tuple
// A match may contain multiple results, the first one is likely the best one for this location
// e.g. `['pfk-a54c14', 'kfc-1ff19c', 'kfc-658eea']`
let itemID, item;
for (let j = 0; j < hits.length; j++) {
const hit = hits[j];
itemID = hit.itemID;
item = _nsi.ids.get(itemID);
if (!item) continue;
const mainTag = item.mainTag; // e.g. `brand:wikidata`
const itemQID = item.tags[mainTag]; // e.g. `brand:wikidata` qid
const notQID = newTags[`not:${mainTag}`]; // e.g. `not:brand:wikidata` qid
if ( // Exceptions, skip this hit
(!itemQID || itemQID === notQID) || // no `*:wikidata` or matched a `not:*:wikidata`
(newTags.office && !item.tags.office) // feature may be a corporate office for a brand? - #6416
) {
item = null;
continue; // continue looking
} else {
break; // use `item`
}
}
// can't use any of these hits, try next tuple
if (!item) continue;
// At this point we have matched a canonical item and can suggest tag upgrades..
const tkv = item.tkv;
const category = _nsi.data[tkv];
const properties = category.properties || {};
// Preserve some tags that we specifally don't want NSI to overwrite. ('^name', sometimes)
const preserveTags = item.preserveTags || properties.preserveTags || [];
let regexes = preserveTags.map(s => new RegExp(s, 'i'));
regexes.push(/^building$/i, /^takeaway$/i);
let keepTags = {};
Object.keys(newTags).forEach(osmkey => {
if (regexes.some(regex => regex.test(osmkey))) {
keepTags[osmkey] = newTags[osmkey];
}
});
// Remove any primary tags ("amenity", "craft", "shop", "man_made", "route", etc)
_nsi.kvt.forEach((v, k) => delete newTags[k]);
// Replace mistagged `wikidata`/`wikipedia` with e.g. `brand:wikidata`/`brand:wikipedia`
if (foundQID) {
delete newTags.wikipedia;
delete newTags.wikidata;
}
Object.assign(newTags, item.tags, keepTags);
return newTags;
}
return changed ? newTags : null;
}
// `_isGenericName()`
// Is the `name` tag generic?
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// Returns
// `true` if it is generic, `false` if not
//
function _isGenericName(tags) {
const n = tags.name;
if (!n) return false;
// tryNames just contains the `name` tag value and nothing else
const tryNames = { primary: new Set([n]), alternate: new Set() };
// Gather key/value tag pairs to try to match
const tryKVs = gatherKVs(tags);
if (!tryKVs.primary.size && !tryKVs.alternate.size) return false;
// Order the [key,value,name] tuples - test primary before alternate
const tuples = gatherTuples(tryKVs, tryNames);
for (let i = 0; i < tuples.length; i++) {
const tuple = tuples[i];
const hits = _nsi.matcher.match(tuple.k, tuple.v, tuple.n); // Attempt to match an item in NSI
// If we get a `excludeGeneric` hit, this is a generic name.
if (hits && hits.length && hits[0].match === 'excludeGeneric') return true;
}
return false;
}
// PUBLIC INTERFACE
export default {
// `init()`
// On init, start preparing the name-suggestion-index
//
init: () => {
// Note: service.init is called immediately after the presetManager has started loading its data.
// We expect to chain onto an unfulfilled promise here.
setNsiSources();
presetManager.ensureLoaded()
.then(() => loadNsiPresets())
.then(() => delay(100)) // wait briefly for locationSets to enter the locationManager queue
.then(() => locationManager.mergeLocationSets([])) // wait for locationSets to resolve
.then(() => loadNsiData())
.then(() => _nsiStatus = 'ok')
.catch(() => _nsiStatus = 'failed');
function delay(msec) {
return new Promise(resolve => {
window.setTimeout(resolve, msec);
});
}
},
// `reset()`
// Reset is called when user saves data to OSM (does nothing here)
//
reset: () => {},
// `status()`
// To let other code know how it's going...
//
// Returns
// `String`: 'loading', 'ok', 'failed'
//
status: () => _nsiStatus,
// `isGenericName()`
// Is the `name` tag generic?
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// Returns
// `true` if it is generic, `false` if not
//
isGenericName: (tags) => _isGenericName(tags),
// `upgradeTags()`
// Suggest tag upgrades.
// This function will not modify the input tags, it makes a copy.
//
// Arguments
// `tags`: `Object` containing the feature's OSM tags
// `loc`: Location where this feature exists, as a [lon, lat]
// Returns
// `Object`: The tags the the feature should have, or `null` if no change
//
upgradeTags: (tags, loc) => _upgradeTags(tags, loc),
// `cache()`
// Direct access to the NSI cache, useful for testing or breaking things
//
// Returns
// `Object`: the internal NSI cache
//
cache: () => _nsi
};
+14 -295
View File
@@ -1,22 +1,20 @@
import { t } from '../core/localizer';
import { matcher as Matcher } from 'name-suggestion-index';
import { fileFetcher, locationManager } from '../core';
import { actionChangePreset } from '../actions/change_preset';
import { actionChangeTags } from '../actions/change_tags';
import { actionUpgradeTags } from '../actions/upgrade_tags';
import { fileFetcher } from '../core';
import { presetManager } from '../presets';
import { services } from '../services';
import { osmIsOldMultipolygonOuterMember, osmOldMultipolygonOuterMemberOfRelation } from '../osm/multipolygon';
import { utilArrayUniq, utilDisplayLabel, utilHashcode, utilTagDiff } from '../util';
import { utilDisplayLabel, utilHashcode, utilTagDiff } from '../util';
import { validationIssue, validationIssueFix } from '../core/validation';
export function validationOutdatedTags() {
const type = 'outdated_tags';
let _waitingForDeprecated = true;
let _waitingForNSI = true;
let _dataDeprecated;
let _nsi;
// fetch deprecated tags
fileFetcher.get('deprecated')
@@ -25,164 +23,6 @@ export function validationOutdatedTags() {
.finally(() => _waitingForDeprecated = false);
function delay(msec) {
return new Promise(resolve => {
window.setTimeout(resolve, msec);
});
}
// This Promise will fulfill after NSI presets are loaded and locations merged into the locationManager.
function waitForNSIPresets() {
return Promise.all([
fileFetcher.get('nsi_presets'),
fileFetcher.get('nsi_features')
])
.then(() => delay(1000)) // wait 1 sec for locationSets to enter the locationManager queue
.then(() => locationManager.mergeLocationSets([]) );
}
// Fetch the name-suggestion-index data
waitForNSIPresets()
.then(() => Promise.all([
fileFetcher.get('nsi_data'),
fileFetcher.get('nsi_replacements'),
fileFetcher.get('nsi_trees')
]))
.then(vals => {
if (_nsi) return _nsi;
_nsi = {
data: vals[0].nsi, // the raw name-suggestion-index data
replacements: vals[1].replacements, // trivial old->new qid replacements
trees: vals[2].trees, // metadata about trees, main tags
keys: new Set(), // primary osm keys to check for a NSI match
qids: new Map(), // Map wd/wp tag values -> qids
ids: new Map() // Map id -> NSI item
};
_nsi.matcher = Matcher();
_nsi.matcher.buildMatchIndex(_nsi.data);
_nsi.matcher.buildLocationIndex(_nsi.data, locationManager.loco());
Object.keys(_nsi.data).forEach(tkv => {
const parts = tkv.split('/', 3); // tkv = "tree/key/value"
const t = parts[0];
const k = parts[1];
// Collect primary keys (e.g. "amenity", "craft", "shop", "man_made", "route", etc)
_nsi.keys.add(k);
const tree = _nsi.trees[t]; // e.g. "brands", "operators"
const mainTag = tree.mainTag; // e.g. "brand:wikidata", "operator:wikidata", etc
const items = _nsi.data[tkv] || [];
items.forEach(item => {
// Cache NSI ids and main tags
item.mainTag = mainTag;
_nsi.ids.set(item.id, item);
// Cache Wikidata/Wikipedia values, for #6416
const wd = item.tags[mainTag];
const wp = item.tags[mainTag.replace('wikidata', 'wikipedia')];
if (wd) _nsi.qids.set(wd, wd);
if (wp && wd) _nsi.qids.set(wp, wd);
});
});
_nsi.keys.add('building'); // fallback can match building=* for some categories
return _nsi;
})
.catch(() => { /* ignore */ })
.finally(() => _waitingForNSI = false);
// Patterns for matching OSM keys that might contain namelike values.
// These roughly correspond to the "trees" concept in name-suggestion-index,
// but they can't be trees because there is overlap between different trees
// (e.g. 'amenity/yes' could match something from the "brands" tree or the "operators" tree)
const namePatterns = {
transit: {
primary: [
/^network$/i
],
alternate: [
/^(operator|network)(:\\w+)?$/i, // `network:guid`, `network:short`, etc.
/^(\\w+)_name(:\\w+)?$/i
]
},
flags: {
primary: [
/^flag:name(:\\w+)?$/i // `flag:name`, poss. w/ lang suffix
],
alternate: [
/^(flag|subject)(:\\w+)?$/i // note: no `country`, we special-case it in gatherNames
]
},
pois: {
primary: [
/^name(:\\w+)?$/i // `name`, poss. w/ lang suffix
],
alternate: [
/^(brand|operator)(:\\w+)?$/i, // `brand` or `operator`, poss. w/ lang suffix
/^(\\w+)_name(:\\w+)?$/i // `alt_name`, `short_name`, `official_name`, poss. w/ lang suffix
]
},
};
// There are a few exceptions to the namelike regexes.
// Usually a tag suffix contains a language code like `name:en`, `name:ru`
// but we want to exclude things like `operator:type`, `name:etymology`, etc..
const notNames = /:(colour|type|left|right|etymology|pronunciation|wikipedia)$/i;
// Gather all the namelike values that we will run through the NSI matcher
function gatherNames(tags) {
let names = [];
let patterns;
if (tags.route) {
patterns = namePatterns.transit;
} else if (tags.man_made === 'flagpole') {
patterns = namePatterns.flags;
} else {
patterns = namePatterns.pois;
}
let osmkeys = Object.keys(tags);
for (let j = 0; j < osmkeys.length; j++) {
const k = osmkeys[j];
const v = tags[k];
if (!v) continue;
if (isNamelike(k, 'primary')) {
if (/;/.test(v)) return []; // bail out if any namelike value contains a semicolon
names.unshift(v); // primary names at the beginning of the list
}
else if (isNamelike(k, 'alternate')) {
if (/;/.test(v)) return []; // bail out if any namelike value contains a semicolon
names.push(v); // alternate names at the end of the list
}
}
names = utilArrayUniq(names);
// For flags only, fallback to `country` tag only if no other namelike values were found.
// See https://github.com/openstreetmap/iD/pull/8305#issuecomment-769174070
if (tags.man_made === 'flagpole' && !names.length && !!tags.country) {
const v = tags.country;
if (/;/.test(v)) return []; // bail out if any namelike value contains a semicolon
names = [v];
}
return names;
function isNamelike(osmkey, which) {
return patterns[which].some(regex => regex.test(osmkey) && !notNames.test(osmkey));
}
}
function oldTagIssues(entity, graph) {
const oldTags = Object.assign({}, entity.tags); // shallow copy
let preset = presetManager.match(entity, graph);
@@ -224,143 +64,22 @@ export function validationOutdatedTags() {
}
// Attempt to match a canonical record in the name-suggestion-index.
// This index contains the most correct tagging for many commonly mapped features.
// See https://github.com/osmlab/name-suggestion-index and https://nsi.guide
if (_nsi) {
// Perform trivial Wikipedia/Wikidata replacements
Object.keys(newTags).forEach(osmkey => {
const matchTag = osmkey.match(/^(\w+:)?wikidata$/);
if (matchTag) { // Look at '*:wikidata' tags
const prefix = (matchTag[1] || '');
const wd = newTags[osmkey];
const replace = _nsi.replacements[wd]; // If it matches a QID in the replacement list...
if (replace && replace.wikidata !== undefined) { // replace or delete `*:wikidata` tag
if (replace.wikidata) {
newTags[osmkey] = replace.wikidata;
} else {
delete newTags[osmkey];
}
}
if (replace && replace.wikipedia !== undefined) { // replace or delete `*:wikipedia` tag
const wpkey = `${prefix}wikipedia`;
if (replace.wikipedia) {
newTags[wpkey] = replace.wikipedia;
} else {
delete newTags[wpkey];
}
}
}
});
// Do `wikidata` or `wikipedia` tags identify this entity as a chain? #6416
// If so, these tags can be swapped to e.g. `brand:wikidata`/`brand:wikipedia` below.
let foundQID = _nsi.qids.get(newTags.wikidata) || _nsi.qids.get(newTags.wikipedia);
// We will only spend time to compute these things if it's necessary
let names, loc, match;
// Try each primary key ("amenity", "craft", "shop", "man_made", "route", etc)
const nsiKeys = Array.from(_nsi.keys);
for (let i = 0; i < nsiKeys.length; i++) {
if (match) break; // matched already, stop looking
let k = nsiKeys[i];
let v = newTags[k];
if (!v) continue;
// Only attempt a match on building/yes if there is nothing else remarkable about that building.
if (k === 'building') {
v = 'yes';
if (preset.id !== 'building/yes') continue; // the feature matched a better preset
}
if (!loc) { // collect location for this feature only once
loc = entity.extent(graph).center();
}
if (!names) { // collect names for this feature only once
names = gatherNames(newTags);
if (foundQID) names.push(foundQID); // matcher will recognize the QID as an alternate name too
}
// Try each namelike value
for (let n = 0; n < names.length; n++) {
match = _nsi.matcher.match(k, v, names[n], loc); // Attempt to match an item in NSI
if (!match) continue; // keep looking
// If we get here, there was a match..
// A match may contain multiple results, the first one is the best one for this location
// e.g. `['pfk-a54c14', 'kfc-1ff19c', 'kfc-658eea']`
const itemID = match[0].itemID;
const item = _nsi.ids.get(itemID);
const mainTag = item.mainTag; // e.g. `brand:wikidata`
const itemQID = item.tags[mainTag]; // e.g. `brand:wikidata` qid
const notQID = newTags[`not:${mainTag}`]; // e.g. `not:brand:wikidata` qid
// Exceptions, throw out the match
if (
(!itemQID || itemQID === notQID) || // no `*:wikidata` or matched a `not:*:wikidata`
(newTags.office && !item.tags.office) // feature may be a corporate office for a brand? - #6416
) {
match = null; // forget match and keep looking
continue; // (it might make sense to stop looking, not sure)
}
// We are keeping the match at this point
const nsi = services.nsi;
let waitingForNsi = false;
if (nsi) {
waitingForNsi = (nsi.status() === 'loading');
if (!waitingForNsi) {
const loc = entity.extent(graph).center();
const result = nsi.upgradeTags(newTags, loc);
if (result) {
newTags = result;
subtype = 'noncanonical_brand';
// Keys that we don't want NSI to overwrite.
let keepKeys = [/^building$/i, /^flag:name$/i, /^takeaway$/i];
// Don't overwrite a `name` tag if this preset shows a `brand` or `operator` field.
// (For presets like hotels, car dealerships, post offices, the `name` should be left alone)
// see also similar logic in `localized.js`
const nsiPreset = presetManager.matchTags(item.tags, 'point'); // (the actual geometry doesn't matter)
if (nsiPreset) {
const fields = nsiPreset.fields();
const showsBrandField = fields.some(d => d.id === 'brand');
const showsOperatorField = fields.some(d => d.id === 'operator');
const setsName = item.tags.name;
const setsBrandWikidata = item.tags['brand:wikidata'];
const setsOperatorWikidata = item.tags['operator:wikidata'];
if (setsName && (
(setsBrandWikidata && showsBrandField) ||
(setsOperatorWikidata && showsOperatorField)
)) {
keepKeys.push(/^name(:\w+)?$/i); // `name`, `name:en`, etc
}
}
// Preserve some tag values that we don't want NSI to overwrite.
let keepTags = {};
Object.keys(newTags).forEach(k => {
if (keepKeys.some(pattern => pattern.test(k))) {
keepTags[k] = newTags[k];
}
});
// Replace the primary tags with what's in NSI ("amenity", "craft", "shop", "man_made", "route", etc)
nsiKeys.forEach(k => delete newTags[k]);
// Replace `wikidata`/`wikipedia` with e.g. `brand:wikidata`/`brand:wikipedia`
if (foundQID) {
delete newTags.wikipedia;
delete newTags.wikidata;
}
Object.assign(newTags, item.tags, keepTags);
break; // stop looking
}
}
// maybe someday: match features without the location to determine
// if a feature appears somewhere in the world that it shouldn't.
} // end if _nsi
}
let issues = [];
issues.provisional = (_waitingForDeprecated || _waitingForNSI);
issues.provisional = (_waitingForDeprecated || waitingForNsi);
// determine diff
const tagDiff = utilTagDiff(oldTags, newTags);
+19 -23
View File
@@ -1,8 +1,8 @@
import { fileFetcher } from '../core/file_fetcher';
import { t, localizer } from '../core/localizer';
import { presetManager } from '../presets';
import { validationIssue, validationIssueFix } from '../core/validation';
import { actionChangeTags } from '../actions/change_tags';
import { presetManager } from '../presets';
import { services } from '../services';
import { t, localizer } from '../core/localizer';
import { validationIssue, validationIssueFix } from '../core/validation';
export function validationSuspiciousName() {
@@ -11,27 +11,23 @@ export function validationSuspiciousName() {
'aerialway', 'aeroway', 'amenity', 'building', 'craft', 'highway',
'leisure', 'railway', 'man_made', 'office', 'shop', 'tourism', 'waterway'
];
let _dataGenerics;
let _waitingForGenerics = true;
fileFetcher.get('nsi_generics')
.then(data => {
if (_dataGenerics) return _dataGenerics;
// known list of generic names (e.g. "bar")
_dataGenerics = data.genericWords.map(pattern => new RegExp(pattern, 'i'));
return _dataGenerics;
})
.catch(() => { /* ignore */ })
.finally(() => _waitingForGenerics = false);
let _waitingForNsi = false;
function isDiscardedSuggestionName(lowercaseName) {
if (!_dataGenerics) return false;
return _dataGenerics.some(regex => regex.test(lowercaseName));
// Attempt to match a generic record in the name-suggestion-index.
function isGenericMatchInNsi(tags) {
const nsi = services.nsi;
if (nsi) {
_waitingForNsi = (nsi.status() === 'loading');
if (!_waitingForNsi) {
return nsi.isGenericName(tags);
}
}
return false;
}
// test if the name is just the key or tag value (e.g. "park")
// Test if the name is just the key or tag value (e.g. "park")
function nameMatchesRawTag(lowercaseName, tags) {
for (let i = 0; i < keysToTestForGenericValues.length; i++) {
let key = keysToTestForGenericValues[i];
@@ -51,7 +47,7 @@ export function validationSuspiciousName() {
function isGenericName(name, tags) {
name = name.toLowerCase();
return nameMatchesRawTag(name, tags) || isDiscardedSuggestionName(name);
return nameMatchesRawTag(name, tags) || isGenericMatchInNsi(tags);
}
function makeGenericNameIssue(entityId, nameKey, genericName, langCode) {
@@ -173,7 +169,7 @@ export function validationSuspiciousName() {
}
}
if (isGenericName(value, tags)) {
issues.provisional = _waitingForGenerics; // retry later if we don't have the generics yet
issues.provisional = _waitingForNsi; // retry later if we are waiting on NSI to finish loading
issues.push(makeGenericNameIssue(entity.id, key, value, langCode));
}
}
-6
View File
@@ -20,12 +20,6 @@ iD.fileFetcher.cache().preset_presets = {};
// Initializing `coreContext` initializes `_validator`, which tries loading:
iD.fileFetcher.cache().deprecated = [];
iD.fileFetcher.cache().nsi_presets = { presets: {} };
iD.fileFetcher.cache().nsi_data = { nsi: {} };
iD.fileFetcher.cache().nsi_features = { type: 'FeatureCollection', features: [] };
iD.fileFetcher.cache().nsi_generics = { genericWords: [] };
iD.fileFetcher.cache().nsi_replacements = { replacements: {} };
iD.fileFetcher.cache().nsi_trees = { trees: {} };
// Initializing `coreContext` initializes `_uploader`, which tries loading:
iD.fileFetcher.cache().discarded = {};
+56 -4
View File
@@ -2,11 +2,38 @@ describe('iD.validations.suspicious_name', function () {
var context;
before(function() {
iD.fileFetcher.cache().nsi_generics = { genericWords: ['^stores?$'] };
iD.services.nsi = iD.serviceNsi;
iD.fileFetcher.cache().nsi_presets = { presets: {} };
iD.fileFetcher.cache().nsi_features = { type: 'FeatureCollection', features: [] };
iD.fileFetcher.cache().nsi_replacements = { replacements: {} };
iD.fileFetcher.cache().nsi_trees = {
trees: {
brands: {
mainTag: 'brand:wikidata'
}
}
};
iD.fileFetcher.cache().nsi_data = {
nsi: {
'brands/shop/supermarket': {
properties: {
path: 'brands/shop/supermarket',
exclude: {
generic: ['^(mini|super)?\\s?(market|mart|mercado)( municipal)?$' ],
named: ['^(famiglia cooperativa|семейный)$']
}
}
}
}
};
iD.fileFetcher.cache().nsi_generics = {
genericWords: ['^stores?$']
};
});
after(function() {
iD.fileFetcher.cache().nsi_generics = { genericWords: [] };
delete iD.services.nsi;
});
beforeEach(function() {
@@ -86,8 +113,33 @@ describe('iD.validations.suspicious_name', function () {
}, 20);
});
it('flags feature with a known generic name', function(done) {
createWay({ shop: 'supermarket', name: 'Store' });
it('ignores feature matching excludeNamed pattern in name-suggestion-index', function(done) {
createWay({ shop: 'supermarket', name: 'famiglia cooperativa' });
var validator = iD.validationSuspiciousName(context);
window.setTimeout(function() { // async, so data will be available
var issues = validate(validator);
expect(issues).to.have.lengthOf(0);
done();
}, 20);
});
it('flags feature matching a excludeGeneric pattern in name-suggestion-index', function(done) {
createWay({ shop: 'supermarket', name: 'super mercado' });
var validator = iD.validationSuspiciousName(context);
window.setTimeout(function() { // async, so data will be available
var issues = validate(validator);
expect(issues).to.have.lengthOf(1);
var issue = issues[0];
expect(issue.type).to.eql('suspicious_name');
expect(issue.subtype).to.eql('generic_name');
expect(issue.entityIds).to.have.lengthOf(1);
expect(issue.entityIds[0]).to.eql('w-1');
done();
}, 20);
});
it('flags feature matching a global exclude pattern in name-suggestion-index', function(done) {
createWay({ shop: 'supermarket', name: 'store' });
var validator = iD.validationSuspiciousName(context);
window.setTimeout(function() { // async, so data will be available
var issues = validate(validator);