From 0b3df36da07ba4503af672dcda0a7f9643e37637 Mon Sep 17 00:00:00 2001 From: Bryan Housel Date: Thu, 7 Jul 2016 23:54:00 -0400 Subject: [PATCH] Replace diacritics when doing fuzzy searches (closes #3159) --- modules/presets/collection.js | 49 +++++----- modules/presets/preset.js | 2 +- modules/util/util.js | 7 ++ package.json | 1 + test/spec/presets/collection.js | 157 +++++++++++++++++++++++++++----- 5 files changed, 170 insertions(+), 46 deletions(-) diff --git a/modules/presets/collection.js b/modules/presets/collection.js index 69966c20e..a053931d6 100644 --- a/modules/presets/collection.js +++ b/modules/presets/collection.js @@ -1,4 +1,5 @@ import { editDistance } from '../util/index'; + export function Collection(collection) { var maxSearchResults = 50, maxSuggestionResults = 10; @@ -22,6 +23,20 @@ export function Collection(collection) { search: function(value, geometry) { if (!value) return this; + function leading(a) { + var index = a.indexOf(value); + return index === 0 || a[index - 1] === ' '; + } + + function suggestionName(name) { + var nameArray = name.split(' - '); + if (nameArray.length > 1) { + name = nameArray.slice(0, nameArray.length - 1).join(' - '); + } + return name.toLowerCase(); + } + + value = value.toLowerCase(); var searchable = _.filter(collection, function(a) { @@ -31,10 +46,6 @@ export function Collection(collection) { return a.suggestion === true; }); - function leading(a) { - var index = a.indexOf(value); - return index === 0 || a[index - 1] === ' '; - } // matches value to preset.name var leading_name = _.filter(searchable, function(a) { @@ -57,10 +68,10 @@ export function Collection(collection) { // finds close matches to value in preset.name - var levenstein_name = searchable.map(function(a) { + var similar_name = searchable.map(function(a) { return { preset: a, - dist: editDistance(value, a.name().toLowerCase()) + dist: editDistance(value, a.name()) }; }).filter(function(a) { return a.dist + Math.min(value.length - a.preset.name().length, 0) < 3; @@ -71,20 +82,12 @@ export function Collection(collection) { }); // finds close matches to value in preset.terms - var leventstein_terms = _.filter(searchable, function(a) { + var similar_terms = _.filter(searchable, function(a) { return _.some(a.terms() || [], function(b) { return editDistance(value, b) + Math.min(value.length - b.length, 0) < 3; }); }); - function suggestionName(name) { - var nameArray = name.split(' - '); - if (nameArray.length > 1) { - name = nameArray.slice(0, nameArray.length-1).join(' - '); - } - return name.toLowerCase(); - } - var leading_suggestions = _.filter(suggestions, function(a) { return leading(suggestionName(a.name())); }).sort(function(a, b) { @@ -95,7 +98,7 @@ export function Collection(collection) { else return i; }); - var leven_suggestions = suggestions.map(function(a) { + var similar_suggestions = suggestions.map(function(a) { return { preset: a, dist: editDistance(value, suggestionName(a.name())) @@ -111,13 +114,13 @@ export function Collection(collection) { var other = presets.item(geometry); var results = leading_name.concat( - leading_terms, - leading_tag_values, - leading_suggestions.slice(0, maxSuggestionResults+5), - levenstein_name, - leventstein_terms, - leven_suggestions.slice(0, maxSuggestionResults) - ).slice(0, maxSearchResults-1); + leading_terms, + leading_tag_values, + leading_suggestions.slice(0, maxSuggestionResults + 5), + similar_name, + similar_terms, + similar_suggestions.slice(0, maxSuggestionResults) + ).slice(0, maxSearchResults - 1); return Collection(_.uniq( results.concat(other) diff --git a/modules/presets/preset.js b/modules/presets/preset.js index 89b4084f2..966881b54 100644 --- a/modules/presets/preset.js +++ b/modules/presets/preset.js @@ -35,7 +35,7 @@ export function Preset(id, preset, fields) { return t('presets.presets.' + id + '.' + scope, options); }; - var name = preset.name; + var name = preset.name || ''; preset.name = function() { if (preset.suggestion) { id = id.split('/'); diff --git a/modules/util/util.js b/modules/util/util.js index db44f3627..d3cc57791 100644 --- a/modules/util/util.js +++ b/modules/util/util.js @@ -1,3 +1,5 @@ +import { remove as removeDiacritics } from 'diacritics'; + export function tagText(entity) { return d3.entries(entity.tags).map(function(e) { return e.key + '=' + e.value; @@ -114,7 +116,12 @@ export function getStyle(selector) { } } +// Calculates Levenshtein distance between two strings +// see: https://en.wikipedia.org/wiki/Levenshtein_distance +// first converts the strings to lowercase and replaces diacritic marks with ascii equilivants. export function editDistance(a, b) { + a = removeDiacritics(a.toLowerCase()); + b = removeDiacritics(b.toLowerCase()); if (a.length === 0) return b.length; if (b.length === 0) return a.length; var matrix = []; diff --git a/package.json b/package.json index 28ff594b8..0ec7aa1fd 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ ], "license": "ISC", "dependencies": { + "diacritics": "1.2.3", "marked": "0.3.5", "osm-auth": "0.2.8", "rbush": "2.0.1", diff --git a/test/spec/presets/collection.js b/test/spec/presets/collection.js index be48c0ecb..8df0d641b 100644 --- a/test/spec/presets/collection.js +++ b/test/spec/presets/collection.js @@ -1,60 +1,173 @@ +/* global locale: true */ +/* eslint no-console: 0 */ + describe('iD.presets.Collection', function() { var p = { point: iD.presets.Preset('point', { + name: 'Point', tags: {}, geometry: ['point'] }), + line: iD.presets.Preset('line', { + name: 'Line', + tags: {}, + geometry: ['line'] + }), area: iD.presets.Preset('area', { + name: 'Area', tags: {}, geometry: ['area'] }), - residential: iD.presets.Preset('highway/residential', { - tags: { - highway: 'residential' - }, - geometry: ['line'] + grill: iD.presets.Preset('__test/amenity/bbq', { + name: 'Grill', + tags: { amenity: 'bbq' }, + geometry: ['point'], + terms: [] }), - park: iD.presets.Preset('leisure/park', { - tags: { - leisure: 'park' - }, - geometry: ['point', 'area'] + sandpit: iD.presets.Preset('__test/amenity/grit_bin', { + name: 'Sandpit', + tags: { amenity: 'grit_bin' }, + geometry: ['point'], + terms: [] + }), + residential: iD.presets.Preset('__test/highway/residential', { + name: 'Residential Area', + tags: { highway: 'residential' }, + geometry: ['point', 'area'], + terms: [] + }), + grass: iD.presets.Preset('__test/landuse/grass', { + name: 'Grass', + tags: { landuse: 'grass' }, + geometry: ['point', 'area'], + terms: [] + }), + park: iD.presets.Preset('__test/leisure/park', { + name: 'Park', + tags: { leisure: 'park' }, + geometry: ['point', 'area'], + terms: [ 'grass' ] + }), + soccer: iD.presets.Preset('__test/leisure/pitch/soccer', { + name: 'Soccer Field', + tags: { leisure: 'pitch', sport: 'soccer' }, + geometry: ['point', 'area'], + terms: ['fußball'] + }), + football: iD.presets.Preset('__test/leisure/pitch/american_football', { + name: 'Football Field', + tags: { leisure: 'pitch', sport: 'american_football' }, + geometry: ['point', 'area'], + terms: ['gridiron'] }) }; - var c = iD.presets.Collection([p.point, p.area, p.residential, p.park]); + + var c = iD.presets.Collection([ + p.point, p.line, p.area, p.grill, p.sandpit, + p.residential, p.grass, p.park, p.soccer, p.football + ]); + + var saved, error; + + // setup mock locale object.. + beforeEach(function() { + saved = locale; + error = console.error; + console.error = function () {}; + locale = { + _current: 'en', + en: { + presets: { + presets: { + // fake locale names and terms for `preset.t()` + '__test/amenity/bbq': { + 'name': 'Grill', + 'terms': '' + }, + '__test/amenity/grit_bin': { + 'name': 'Sandpit', + 'terms': '' + }, + '__test/highway/residential': { + 'name': 'Residential Area', + 'terms': '' + }, + '__test/landuse/grass': { + 'name': 'Grass', + 'terms': '' + }, + '__test/leisure/park': { + 'name': 'Park', + 'terms': 'grass' + }, + '__test/leisure/pitch/soccer': { + 'name': 'Soccer Field', + 'terms': 'fußball' + }, + '__test/leisure/pitch/american_football': { + 'name': 'Football Field', + 'terms': 'gridiron' + } + } + } + } + }; + }); + + afterEach(function() { + locale = saved; + console.error = error; + }); + describe('#item', function() { it('fetches a preset by id', function() { - expect(c.item('highway/residential')).to.equal(p.residential); + expect(c.item('__test/highway/residential')).to.equal(p.residential); }); }); describe('#matchGeometry', function() { it('returns a new collection only containing presets matching a geometry', function() { - expect(c.matchGeometry('area').collection).to.eql([p.area, p.park]); + expect(c.matchGeometry('area').collection).to.include.members( + [p.area, p.residential, p.park, p.soccer, p.football] + ); }); }); describe('#search', function() { - it('filters presets by name', function() { - expect(c.search('resid', 'line').collection.indexOf(p.residential) >= 0).to.eql(true); + it('matches leading name', function() { + var col = c.search('resid', 'area').collection; + expect(col.indexOf(p.residential)).to.eql(0); // 1. 'Residential' (by name) }); - it('is fuzzy', function() { - expect(c.search('rusid', 'line').collection.indexOf(p.residential) >= 0).to.eql(true); + it('returns alternate matches in correct order', function() { + var col = c.search('gri', 'point').matchGeometry('point').collection; + expect(col.indexOf(p.grill)).to.eql(0); // 1. 'Grill' (leading name) + expect(col.indexOf(p.football)).to.eql(1); // 2. 'Football' (leading term 'gridiron') + expect(col.indexOf(p.sandpit)).to.eql(2); // 3. 'Sandpit' (leading tag value 'grit_bin') + expect(col.indexOf(p.grass)).to.eql(3); // 4. 'Grass' (similar name 'grass') + expect(col.indexOf(p.park)).to.eql(4); // 5. 'Park' (similar term 'grass') + }); + + it.skip('considers diacritics on exact matches', function() { + }); + + it.skip('replaces diacritics on fuzzy matches', function() { }); it('includes the appropriate fallback preset', function() { - expect(c.search('blade of grass', 'point').collection.indexOf(p.point) >= 0).to.eql(true); - expect(c.search('blade of grass', 'area').collection.indexOf(p.area) >= 0).to.eql(true); + expect(c.search('foo', 'point').collection).to.include(p.point); + expect(c.search('foo', 'line').collection).to.include(p.line); + expect(c.search('foo', 'area').collection).to.include(p.area); }); it('excludes presets with searchable: false', function() { - var excluded = iD.presets.Preset('excluded', { - tags: {}, - geometry: [], + var excluded = iD.presets.Preset('__test/excluded', { + name: 'excluded', + tags: { amenity: 'excluded' }, + geometry: ['point'], searchable: false }), collection = iD.presets.Collection([excluded, p.point]);