mirror of
https://github.com/FoggedLens/iD.git
synced 2026-05-16 13:59:27 +02:00
Upgrade validations outdated_tags, suspicious_names to NSI v5
This commit is contained in:
+4
-4
@@ -1767,9 +1767,9 @@ en:
|
||||
message: "{feature} has incomplete tags"
|
||||
reference: "Some features should have additional tags."
|
||||
noncanonical_brand:
|
||||
message: "{feature} looks like a brand with nonstandard tags"
|
||||
message_incomplete: "{feature} looks like a brand with incomplete tags"
|
||||
reference: "All features of the same brand should be tagged the same way."
|
||||
message: "{feature} looks like a common feature with nonstandard tags"
|
||||
message_incomplete: "{feature} looks like a common feature with incomplete tags"
|
||||
reference: "Some features, for example retail chains or post offices, are expected to have certain tags in common."
|
||||
point_as_area:
|
||||
message: '{feature} should be a point, not an area'
|
||||
point_as_line:
|
||||
@@ -2332,4 +2332,4 @@ en:
|
||||
wikidata:
|
||||
identifier: "Identifier"
|
||||
label: "Label"
|
||||
description: "Description"
|
||||
description: "Description"
|
||||
Vendored
+1
-1
File diff suppressed because one or more lines are too long
@@ -20,11 +20,6 @@ export function coreFileFetcher() {
|
||||
'languages': 'data/languages.min.json',
|
||||
'locales': 'locales/index.min.json',
|
||||
|
||||
// old
|
||||
'nsi_brands': 'https://cdn.jsdelivr.net/npm/name-suggestion-index@4/dist/brands.min.json',
|
||||
'nsi_filters': 'https://cdn.jsdelivr.net/npm/name-suggestion-index@4/dist/filters.min.json',
|
||||
|
||||
// new
|
||||
'nsi_presets': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/presets/nsi-id-presets.min.json',
|
||||
'nsi_data': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/nsi.min.json',
|
||||
'nsi_features': 'https://raw.githubusercontent.com/osmlab/name-suggestion-index/main/dist/featureCollection.min.json',
|
||||
|
||||
@@ -1,15 +1,13 @@
|
||||
import { t } from '../core/localizer';
|
||||
import * as countryCoder from '@ideditor/country-coder';
|
||||
import LocationConflation from '@ideditor/location-conflation';
|
||||
import { matcher as Matcher } from 'name-suggestion-index';
|
||||
|
||||
import { presetManager } from '../presets';
|
||||
import { fileFetcher } from '../core/file_fetcher';
|
||||
import { fileFetcher, locationManager } from '../core';
|
||||
import { actionChangePreset } from '../actions/change_preset';
|
||||
import { actionChangeTags } from '../actions/change_tags';
|
||||
import { actionUpgradeTags } from '../actions/upgrade_tags';
|
||||
import { presetManager } from '../presets';
|
||||
import { osmIsOldMultipolygonOuterMember, osmOldMultipolygonOuterMemberOfRelation } from '../osm/multipolygon';
|
||||
import { utilDisplayLabel, utilTagDiff } from '../util';
|
||||
import { utilArrayUniq, utilDisplayLabel, utilTagDiff } from '../util';
|
||||
import { validationIssue, validationIssueFix } from '../core/validation';
|
||||
|
||||
|
||||
@@ -18,7 +16,6 @@ let _nsi;
|
||||
|
||||
export function validationOutdatedTags() {
|
||||
const type = 'outdated_tags';
|
||||
const nsiKeys = ['amenity', 'shop', 'tourism', 'leisure', 'office'];
|
||||
|
||||
// A concern here in switching to async data means that `_dataDeprecated`
|
||||
// and `_nsi` will not be available at first, so the data on early tiles
|
||||
@@ -30,49 +27,98 @@ export function validationOutdatedTags() {
|
||||
.catch(() => { /* ignore */ });
|
||||
|
||||
|
||||
// console.log('NSI: start fetching..');
|
||||
// // fetch the name-suggestion-index data
|
||||
// Promise.all([
|
||||
// fileFetcher.get('nsi_data'),
|
||||
// fileFetcher.get('nsi_features'),
|
||||
// fileFetcher.get('nsi_generics'),
|
||||
// fileFetcher.get('nsi_replacements'),
|
||||
// fileFetcher.get('nsi_trees')
|
||||
// ])
|
||||
// .then(vals => {
|
||||
// _nsi = {
|
||||
// data: vals[0].nsi,
|
||||
// features: vals[1],
|
||||
// generics: vals[2].genericWords,
|
||||
// replacements: vals[3].replacements,
|
||||
// trees: vals[4].trees
|
||||
// };
|
||||
function delay(msec) {
|
||||
return new Promise(resolve => {
|
||||
window.setTimeout(resolve, msec);
|
||||
});
|
||||
}
|
||||
|
||||
// console.log('NSI: done fetching..');
|
||||
// console.log('NSI: start indexing..');
|
||||
// This Promise will fullfill after NSI presets are loaded and locations merged into the locationManager.
|
||||
function waitForNSIPresets() {
|
||||
return Promise.all([
|
||||
fileFetcher.get('nsi_presets'),
|
||||
fileFetcher.get('nsi_features')
|
||||
])
|
||||
.then(() => delay(1000)) // wait 1 sec for locationSets to enter the locationManager queue
|
||||
.then(() => locationManager.mergeLocationSets([]) );
|
||||
}
|
||||
|
||||
// _nsi.loco = new LocationConflation(_nsi.features);
|
||||
// _nsi.matcher = Matcher();
|
||||
// _nsi.matcher.buildMatchIndex(_nsi.data);
|
||||
// _nsi.matcher.buildLocationIndex(_nsi.data, _nsi.loco);
|
||||
// Fetch the name-suggestion-index data
|
||||
waitForNSIPresets()
|
||||
.then(() => Promise.all([
|
||||
fileFetcher.get('nsi_data'),
|
||||
fileFetcher.get('nsi_replacements'),
|
||||
fileFetcher.get('nsi_trees')
|
||||
]))
|
||||
.then(vals => {
|
||||
if (_nsi) return _nsi;
|
||||
|
||||
// console.log('NSI: done indexing..');
|
||||
_nsi = {
|
||||
data: vals[0].nsi, // the raw name-suggestion-index data
|
||||
replacements: vals[1].replacements, // trivial old->new qid replacements
|
||||
trees: vals[2].trees, // metadata about trees, main tags
|
||||
keys: new Set(), // primary osm keys to check for a NSI match
|
||||
qids: new Map(), // Map wd/wp tag values -> qids
|
||||
ids: new Map() // Map id -> NSI item
|
||||
};
|
||||
|
||||
// // initialize name-suggestion-index matcher
|
||||
// // _nsi.matcher.buildMatchIndex(d.brands);
|
||||
_nsi.matcher = Matcher();
|
||||
_nsi.matcher.buildMatchIndex(_nsi.data);
|
||||
_nsi.matcher.buildLocationIndex(_nsi.data, locationManager.loco());
|
||||
|
||||
// // index all known wikipedia and wikidata tags
|
||||
// // Object.keys(d.brands).forEach(kvnd => {
|
||||
// // const brand = d.brands[kvnd];
|
||||
// // const wd = brand.tags['brand:wikidata'];
|
||||
// // const wp = brand.tags['brand:wikipedia'];
|
||||
// // if (wd) { _nsi.wikidata[wd] = kvnd; }
|
||||
// // if (wp) { _nsi.wikipedia[wp] = kvnd; }
|
||||
// // });
|
||||
Object.keys(_nsi.data).forEach(tkv => {
|
||||
const parts = tkv.split('/', 3); // tkv = "tree/key/value"
|
||||
const t = parts[0];
|
||||
const k = parts[1];
|
||||
|
||||
// return _nsi;
|
||||
// })
|
||||
// .catch(() => { /* ignore */ });
|
||||
// Collect primary keys (e.g. "amenity", "craft", "shop", "man_made", "route", etc)
|
||||
_nsi.keys.add(k);
|
||||
|
||||
const tree = _nsi.trees[t]; // e.g. "brands", "operators"
|
||||
const mainTag = tree.mainTag; // e.g. "brand:wikidata", "operator:wikidata", etc
|
||||
|
||||
const items = _nsi.data[tkv] || [];
|
||||
items.forEach(item => {
|
||||
// Cache NSI ids and main tags
|
||||
item.mainTag = mainTag;
|
||||
_nsi.ids.set(item.id, item);
|
||||
|
||||
// Cache Wikidata/Wikipedia values, for #6416
|
||||
const wd = item.tags[mainTag];
|
||||
const wp = item.tags[mainTag.replace('wikidata', 'wikipedia')];
|
||||
if (wd) _nsi.qids.set(wd, wd);
|
||||
if (wp && wd) _nsi.qids.set(wp, wd);
|
||||
});
|
||||
});
|
||||
|
||||
_nsi.keys.add('building'); // fallback can match building=* for some categories
|
||||
|
||||
return _nsi;
|
||||
})
|
||||
.catch(() => { /* ignore */ });
|
||||
|
||||
|
||||
// Returns true if this tag key is a "namelike" tag that the NSI matcher would have indexed..
|
||||
function isNamelike(k) {
|
||||
const namePatterns = [
|
||||
/^(flag:)?name$/i, // e.g. `name`, `flag:name`
|
||||
/^(brand|country|flag|operator|network|subject)$/i,
|
||||
/^\w+_name$/i, // e.g. `alt_name`, `short_name`
|
||||
/^(name|brand|country|flag|operator|network|subject):\w+$/i, // e.g. `name:en`, `name:ru`
|
||||
/^\w+_name:\w+$/i // e.g. `alt_name:en`, `short_name:ru`
|
||||
];
|
||||
|
||||
return namePatterns.some(pattern => {
|
||||
if (!pattern.test(k)) return false; // k is not a name tag, skip
|
||||
|
||||
// There are a few exceptions to the namelike regexes.
|
||||
// Usually a tag suffix contains a language code like `name:en`, `name:ru`
|
||||
// but we want to exclude things like `operator:type`, `name:etymology`, etc..
|
||||
if (/:(colour|type|left|right|etymology|wikipedia)$/.test(k)) return false;
|
||||
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
function oldTagIssues(entity, graph) {
|
||||
@@ -81,7 +127,7 @@ export function validationOutdatedTags() {
|
||||
let subtype = 'deprecated_tags';
|
||||
if (!preset) return [];
|
||||
|
||||
// upgrade preset..
|
||||
// Upgrade preset, if a replacement is available..
|
||||
if (preset.replacement) {
|
||||
const newPreset = presetManager.item(preset.replacement);
|
||||
graph = actionChangePreset(entity.id, preset, newPreset, true /* skip field defaults */)(graph);
|
||||
@@ -89,7 +135,7 @@ export function validationOutdatedTags() {
|
||||
preset = newPreset;
|
||||
}
|
||||
|
||||
// upgrade tags..
|
||||
// Upgrade deprecated tags..
|
||||
if (_dataDeprecated) {
|
||||
const deprecatedTags = entity.deprecatedTags(_dataDeprecated);
|
||||
if (deprecatedTags.length) {
|
||||
@@ -100,7 +146,7 @@ export function validationOutdatedTags() {
|
||||
}
|
||||
}
|
||||
|
||||
// add missing addTags..
|
||||
// Add missing addTags from the detected preset
|
||||
let newTags = Object.assign({}, entity.tags); // shallow copy
|
||||
if (preset.tags !== preset.addTags) {
|
||||
Object.keys(preset.addTags).forEach(k => {
|
||||
@@ -114,63 +160,119 @@ export function validationOutdatedTags() {
|
||||
});
|
||||
}
|
||||
|
||||
// Attempt to match a canonical record in the name-suggestion-index.
|
||||
// This index contains the most correct tagging for many commonly mapped features.
|
||||
// See https://github.com/osmlab/name-suggestion-index and https://nsi.guide
|
||||
if (_nsi) {
|
||||
// Do `wikidata` or `wikipedia` identify this entity as a brand? #6416
|
||||
// If so, these tags can be swapped to `brand:wikidata`/`brand:wikipedia`
|
||||
let isBrand;
|
||||
if (newTags.wikidata) { // try matching `wikidata`
|
||||
isBrand = _nsi.wikidata[newTags.wikidata];
|
||||
}
|
||||
if (!isBrand && newTags.wikipedia) { // fallback to `wikipedia`
|
||||
isBrand = _nsi.wikipedia[newTags.wikipedia];
|
||||
}
|
||||
if (isBrand && !newTags.office) { // but avoid doing this for corporate offices
|
||||
if (newTags.wikidata) {
|
||||
newTags['brand:wikidata'] = newTags.wikidata;
|
||||
delete newTags.wikidata;
|
||||
|
||||
// Perform trivial Wikipedia/Wikidata replacements
|
||||
Object.keys(newTags).forEach(osmkey => {
|
||||
const matchTag = osmkey.match(/^(\w+:)?wikidata$/);
|
||||
if (matchTag) { // Look at '*:wikidata' tags
|
||||
const prefix = (matchTag[1] || '');
|
||||
const wd = newTags[osmkey];
|
||||
const replace = _nsi.replacements[wd]; // If it matches a QID in the replacement list...
|
||||
|
||||
if (replace && replace.wikidata !== undefined) { // replace or delete `*:wikidata` tag
|
||||
if (replace.wikidata) {
|
||||
newTags[osmkey] = replace.wikidata;
|
||||
} else {
|
||||
delete newTags[osmkey];
|
||||
}
|
||||
}
|
||||
if (replace && replace.wikipedia !== undefined) { // replace or delete `*:wikipedia` tag
|
||||
const wpkey = `${prefix}wikipedia`;
|
||||
if (replace.wikipedia) {
|
||||
newTags[wpkey] = replace.wikipedia;
|
||||
} else {
|
||||
delete newTags[wpkey];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (newTags.wikipedia) {
|
||||
newTags['brand:wikipedia'] = newTags.wikipedia;
|
||||
delete newTags.wikipedia;
|
||||
});
|
||||
|
||||
// Do `wikidata` or `wikipedia` tags identify this entity as a chain? #6416
|
||||
// If so, these tags can be swapped to e.g. `brand:wikidata`/`brand:wikipedia` below.
|
||||
let foundQID = _nsi.qids.get(newTags.wikidata) || _nsi.qids.get(newTags.wikipedia);
|
||||
|
||||
// We will only spend time to compute these things if it's necessary
|
||||
let names, loc, match;
|
||||
|
||||
// Try each primary key ("amenity", "craft", "shop", "man_made", "route", etc)
|
||||
const nsiKeys = Array.from(_nsi.keys);
|
||||
for (let i = 0; i < nsiKeys.length; i++) {
|
||||
if (match) break; // matched already, stop looking
|
||||
let k = nsiKeys[i];
|
||||
let v = newTags[k];
|
||||
if (!v) continue;
|
||||
|
||||
// Only attempt a match on building/yes if there is nothing else remarkable about that building.
|
||||
if (k === 'building') {
|
||||
v = 'yes';
|
||||
if (preset.id !== 'building/yes') continue; // the feature matched a better preset
|
||||
}
|
||||
// I considered setting `name` and other tags here, but they aren't unique per wikidata
|
||||
// (Q2759586 -> in USA "Papa John's", in Russia "Папа Джонс")
|
||||
// So users will really need to use a preset or assign `name` themselves.
|
||||
}
|
||||
|
||||
// try key/value|name match against name-suggestion-index
|
||||
if (newTags.name) {
|
||||
for (let i = 0; i < nsiKeys.length; i++) {
|
||||
const k = nsiKeys[i];
|
||||
if (!newTags[k]) continue;
|
||||
if (!loc) { // collect location for this feature only once
|
||||
loc = entity.extent(graph).center();
|
||||
}
|
||||
if (!names) { // collect names for this feature only once
|
||||
names = Object.keys(newTags)
|
||||
.map(k => isNamelike(k) ? newTags[k] : null)
|
||||
.filter(Boolean);
|
||||
|
||||
const center = entity.extent(graph).center();
|
||||
const countryCode = countryCoder.iso1A2Code(center);
|
||||
const match = _nsi.matcher.matchKVN(k, newTags[k], newTags.name, countryCode && countryCode.toLowerCase());
|
||||
if (!match) continue;
|
||||
if (foundQID) names.unshift(foundQID); // matcher will recognize the QID as a name too
|
||||
names = utilArrayUniq(names);
|
||||
}
|
||||
|
||||
// for now skip ambiguous matches (like Target~(USA) vs Target~(Australia))
|
||||
if (match.d) continue;
|
||||
// Try each namelike value
|
||||
for (let n = 0; n < names.length; n++) {
|
||||
match = _nsi.matcher.match(k, v, names[n], loc); // Attempt to match an item in NSI
|
||||
if (!match) continue; // keep looking
|
||||
|
||||
const brand = _nsi.brands[match.kvnd];
|
||||
if (brand && brand.tags['brand:wikidata'] &&
|
||||
brand.tags['brand:wikidata'] !== entity.tags['not:brand:wikidata']) {
|
||||
subtype = 'noncanonical_brand';
|
||||
// If we get here, there was a match..
|
||||
// A match may contain multiple results, the first one is the best one for this location
|
||||
// e.g. `['pfk-a54c14', 'kfc-1ff19c', 'kfc-658eea']`
|
||||
const item = _nsi.ids.get(match[0]);
|
||||
const mainTag = item.mainTag; // e.g. `brand:wikidata`
|
||||
const itemQID = item.tags[mainTag]; // e.g. `brand:wikidata` qid
|
||||
const notQID = newTags[`not:${mainTag}`]; // e.g. `not:brand:wikidata` qid
|
||||
|
||||
const keepTags = ['takeaway'].reduce((acc, k) => {
|
||||
if (newTags[k]) {
|
||||
acc[k] = newTags[k];
|
||||
}
|
||||
// Exceptions, throw out the match
|
||||
if (
|
||||
(!itemQID || itemQID === notQID) || // no `*:wikidata` or matched a `not:*:wikidata`
|
||||
(newTags.office && !item.tags.office) // feature may be a coprorate office for a brand? - #6416
|
||||
) {
|
||||
match = null; // forget match and keep looking
|
||||
continue; // (it might make sense to stop looking, not sure)
|
||||
}
|
||||
|
||||
// We are keeping the match at this point
|
||||
subtype = 'noncanonical_brand';
|
||||
|
||||
// Preserve some tags values that we don't want NSI to overwrite.
|
||||
const keepTags = ['takeaway', 'building']
|
||||
.reduce((acc, k) => {
|
||||
if (newTags[k]) acc[k] = newTags[k];
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
nsiKeys.forEach(k => delete newTags[k]);
|
||||
Object.assign(newTags, brand.tags, keepTags);
|
||||
break;
|
||||
// Replace the primary tags with what's in NSI ("amenity", "craft", "shop", "man_made", "route", etc)
|
||||
nsiKeys.forEach(k => delete newTags[k]);
|
||||
// Replace `wikidata`/`wikipedia` with e.g. `brand:wikidata`/`brand:wikipedia`
|
||||
if (foundQID) {
|
||||
delete newTags.wikipedia;
|
||||
delete newTags.wikidata;
|
||||
}
|
||||
|
||||
Object.assign(newTags, item.tags, keepTags);
|
||||
break; // stop looking
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// maybe someday: match features without the location to determine
|
||||
// if a feature appears somewhere in the world that it shouldn't.
|
||||
|
||||
} // end if _nsi
|
||||
|
||||
// determine diff
|
||||
const tagDiff = utilTagDiff(oldTags, newTags);
|
||||
|
||||
@@ -17,11 +17,10 @@ export function validationSuspiciousName() {
|
||||
// A concern here in switching to async data means that `_nsiFilters` will not
|
||||
// be available at first, so the data on early tiles may not have tags validated fully.
|
||||
|
||||
fileFetcher.get('nsi_filters')
|
||||
.then(filters => {
|
||||
fileFetcher.get('nsi_generics')
|
||||
.then(data => {
|
||||
// known list of generic names (e.g. "bar")
|
||||
_discardNameRegexes = filters.discardNames
|
||||
.map(discardName => new RegExp(discardName, 'i'));
|
||||
_discardNameRegexes = data.genericWords.map(pattern => new RegExp(pattern, 'i'));
|
||||
})
|
||||
.catch(() => { /* ignore */ });
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
describe('iD.coreLocations', function() {
|
||||
var locationManager, loco, wp;
|
||||
var locationManager, loco;
|
||||
|
||||
var colorado = {
|
||||
type: 'Feature',
|
||||
@@ -27,7 +27,6 @@ describe('iD.coreLocations', function() {
|
||||
// make a new one each time, so we aren't accidently testing the "global" locationManager
|
||||
locationManager = iD.coreLocations();
|
||||
loco = locationManager.loco();
|
||||
wp = locationManager.wp();
|
||||
});
|
||||
|
||||
|
||||
|
||||
@@ -17,10 +17,16 @@ iD.fileFetcher.cache().preset_categories = {};
|
||||
iD.fileFetcher.cache().preset_defaults = {};
|
||||
iD.fileFetcher.cache().preset_fields = {};
|
||||
iD.fileFetcher.cache().preset_presets = {};
|
||||
|
||||
// Initializing `coreContext` initializes `_validator`, which tries loading:
|
||||
iD.fileFetcher.cache().deprecated = [];
|
||||
iD.fileFetcher.cache().nsi_brands = [];
|
||||
iD.fileFetcher.cache().nsi_filters = { discardNames: [] };
|
||||
iD.fileFetcher.cache().nsi_presets = { presets: {} };
|
||||
iD.fileFetcher.cache().nsi_data = { nsi: {} };
|
||||
iD.fileFetcher.cache().nsi_features = { type: 'FeatureCollection', features: [] };
|
||||
iD.fileFetcher.cache().nsi_generics = { genericWords: [] };
|
||||
iD.fileFetcher.cache().nsi_replacements = { replacements: {} };
|
||||
iD.fileFetcher.cache().nsi_trees = { trees: {} };
|
||||
|
||||
// Initializing `coreContext` initializes `_uploader`, which tries loading:
|
||||
iD.fileFetcher.cache().discarded = {};
|
||||
|
||||
|
||||
@@ -2,11 +2,11 @@ describe('iD.validations.suspicious_name', function () {
|
||||
var context;
|
||||
|
||||
before(function() {
|
||||
iD.fileFetcher.cache().nsi_filters = { discardNames: ['^stores?$'] };
|
||||
iD.fileFetcher.cache().nsi_generics = { genericWords: ['^stores?$'] };
|
||||
});
|
||||
|
||||
after(function() {
|
||||
iD.fileFetcher.cache().nsi_filters = { discardNames: [] };
|
||||
iD.fileFetcher.cache().nsi_generics = { genericWords: [] };
|
||||
});
|
||||
|
||||
beforeEach(function() {
|
||||
|
||||
Reference in New Issue
Block a user