From 762307bd7dc25980e153eaa8f6583833327ed669 Mon Sep 17 00:00:00 2001 From: Quincy Morgan Date: Tue, 9 Jun 2020 15:41:15 -0400 Subject: [PATCH] Restrict key, value, and role character limits based on unicode characters, not UTF-16 code units (re: #6817) --- modules/core/context.js | 4 +- modules/osm/entity.js | 9 ++-- modules/ui/commit.js | 30 +++++------ modules/ui/fields/combo.js | 6 +-- modules/ui/fields/wikidata.js | 5 +- modules/ui/fields/wikipedia.js | 4 +- modules/ui/sections/raw_tag_editor.js | 6 +-- modules/util/index.js | 2 + modules/util/util.js | 10 ++++ test/spec/util/util.js | 78 +++++++++++++++++++++++++++ 10 files changed, 123 insertions(+), 31 deletions(-) diff --git a/modules/core/context.js b/modules/core/context.js index df4f54146..ed4bf64ef 100644 --- a/modules/core/context.js +++ b/modules/core/context.js @@ -191,11 +191,9 @@ export function coreContext() { return context; }; - + // String length limits in Unicode characters, not JavaScript UTF-16 code units context.maxCharsForTagKey = () => 255; - context.maxCharsForTagValue = () => 255; - context.maxCharsForRelationRole = () => 255; diff --git a/modules/osm/entity.js b/modules/osm/entity.js index 647fb5910..e10eb3784 100644 --- a/modules/osm/entity.js +++ b/modules/osm/entity.js @@ -1,6 +1,7 @@ import { debug } from '../index'; import { osmIsInterestingTag } from './tags'; -import { utilArrayUnion } from '../util'; +import { utilArrayUnion } from '../util/array'; +import { utilUnicodeCharsTruncated } from '../util/util'; export function osmEntity(attrs) { @@ -149,8 +150,10 @@ osmEntity.prototype = { merged[k] = t2; } else if (t1 !== t2) { changed = true; - merged[k] = utilArrayUnion(t1.split(/;\s*/), t2.split(/;\s*/)).join(';') - .substr(0, 255); // avoid exceeding character limit; see also services/osm.js -> maxCharsForTagValue() + merged[k] = utilUnicodeCharsTruncated( + utilArrayUnion(t1.split(/;\s*/), t2.split(/;\s*/)).join(';'), + 255 // avoid exceeding character limit; see also services/osm.js -> maxCharsForTagValue() + ); } } return changed ? this.update({ tags: merged }) : this; diff --git a/modules/ui/commit.js b/modules/ui/commit.js index 52fc0c19b..246535dab 100644 --- a/modules/ui/commit.js +++ b/modules/ui/commit.js @@ -12,7 +12,7 @@ import { uiChangesetEditor } from './changeset_editor'; import { uiSectionChanges } from './sections/changes'; import { uiCommitWarnings } from './commit_warnings'; import { uiSectionRawTagEditor } from './sections/raw_tag_editor'; -import { utilArrayGroupBy, utilRebind, utilUniqueDomId } from '../util'; +import { utilArrayGroupBy, utilRebind, utilUnicodeCharsTruncated, utilUniqueDomId } from '../util'; import { utilDetect } from '../util/detect'; @@ -92,9 +92,9 @@ export function uiCommit(context) { var detected = utilDetect(); var tags = { comment: prefs('comment') || '', - created_by: ('iD ' + context.version).substr(0, tagCharLimit), - host: detected.host.substr(0, tagCharLimit), - locale: localizer.localeCode().substr(0, tagCharLimit) + created_by: utilUnicodeCharsTruncated('iD ' + context.version, tagCharLimit), + host: utilUnicodeCharsTruncated(detected.host, tagCharLimit), + locale: utilUnicodeCharsTruncated(localizer.localeCode(), tagCharLimit) }; // call findHashtags initially - this will remove stored @@ -126,7 +126,7 @@ export function uiCommit(context) { } }); - tags.source = sources.join(';').substr(0, tagCharLimit); + tags.source = utilUnicodeCharsTruncated(sources.join(';'), tagCharLimit); } context.changeset = new osmChangeset({ tags: tags }); @@ -144,31 +144,31 @@ export function uiCommit(context) { var tags = Object.assign({}, context.changeset.tags); // shallow copy // assign tags for imagery used - var imageryUsed = context.history().imageryUsed().join(';').substr(0, tagCharLimit); + var imageryUsed = utilUnicodeCharsTruncated(context.history().imageryUsed().join(';'), tagCharLimit); tags.imagery_used = imageryUsed || 'None'; // assign tags for closed issues and notes var osmClosed = osm.getClosedIDs(); var itemType; if (osmClosed.length) { - tags['closed:note'] = osmClosed.join(';').substr(0, tagCharLimit); + tags['closed:note'] = utilUnicodeCharsTruncated(osmClosed.join(';'), tagCharLimit); } if (services.keepRight) { var krClosed = services.keepRight.getClosedIDs(); if (krClosed.length) { - tags['closed:keepright'] = krClosed.join(';').substr(0, tagCharLimit); + tags['closed:keepright'] = utilUnicodeCharsTruncated(krClosed.join(';'), tagCharLimit); } } if (services.improveOSM) { var iOsmClosed = services.improveOSM.getClosedCounts(); for (itemType in iOsmClosed) { - tags['closed:improveosm:' + itemType] = iOsmClosed[itemType].toString().substr(0, tagCharLimit); + tags['closed:improveosm:' + itemType] = utilUnicodeCharsTruncated(iOsmClosed[itemType].toString(), tagCharLimit); } } if (services.osmose) { var osmoseClosed = services.osmose.getClosedCounts(); for (itemType in osmoseClosed) { - tags['closed:osmose:' + itemType] = osmoseClosed[itemType].toString().substr(0, tagCharLimit); + tags['closed:osmose:' + itemType] = utilUnicodeCharsTruncated(osmoseClosed[itemType].toString(), tagCharLimit); } } @@ -187,10 +187,10 @@ export function uiCommit(context) { var issuesBySubtype = utilArrayGroupBy(issuesOfType, 'subtype'); for (var issueSubtype in issuesBySubtype) { var issuesOfSubtype = issuesBySubtype[issueSubtype]; - tags[prefix + ':' + issueType + ':' + issueSubtype] = issuesOfSubtype.length.toString().substr(0, tagCharLimit); + tags[prefix + ':' + issueType + ':' + issueSubtype] = utilUnicodeCharsTruncated(issuesOfSubtype.length.toString(), tagCharLimit); } } else { - tags[prefix + ':' + issueType] = issuesOfType.length.toString().substr(0, tagCharLimit); + tags[prefix + ':' + issueType] = utilUnicodeCharsTruncated(issuesOfType.length.toString(), tagCharLimit); } } } @@ -550,14 +550,14 @@ export function uiCommit(context) { Object.keys(changed).forEach(function(k) { var v = changed[k]; - k = k.trim().substr(0, tagCharLimit); + k = utilUnicodeCharsTruncated(k.trim(), tagCharLimit); if (readOnlyTags.indexOf(k) !== -1) return; if (k !== '' && v !== undefined) { if (onInput) { tags[k] = v; } else { - tags[k] = v.trim().substr(0, tagCharLimit); + tags[k] = utilUnicodeCharsTruncated(v.trim(), tagCharLimit); } } else { delete tags[k]; @@ -569,7 +569,7 @@ export function uiCommit(context) { var commentOnly = changed.hasOwnProperty('comment') && (changed.comment !== ''); var arr = findHashtags(tags, commentOnly); if (arr.length) { - tags.hashtags = arr.join(';').substr(0, tagCharLimit); + tags.hashtags = utilUnicodeCharsTruncated(arr.join(';'), tagCharLimit); prefs('hashtags', tags.hashtags); } else { delete tags.hashtags; diff --git a/modules/ui/fields/combo.js b/modules/ui/fields/combo.js index a2fda00fa..e0a8d26fe 100644 --- a/modules/ui/fields/combo.js +++ b/modules/ui/fields/combo.js @@ -9,7 +9,7 @@ import { osmEntity } from '../../osm/entity'; import { t } from '../../core/localizer'; import { services } from '../../services'; import { uiCombobox } from '../combobox'; -import { utilArrayUniq, utilGetSetValue, utilNoAuto, utilRebind } from '../../util'; +import { utilArrayUniq, utilGetSetValue, utilNoAuto, utilRebind, utilUnicodeCharsCount } from '../../util'; export { uiFieldCombo as uiFieldMultiCombo, @@ -448,7 +448,7 @@ export function uiFieldCombo(field, context) { field.keys = _multiData.map(function(d) { return d.key; }); // limit the input length so it fits after prepending the key prefix - maxLength = context.maxCharsForTagKey() - field.key.length; + maxLength = context.maxCharsForTagKey() - utilUnicodeCharsCount(field.key); } else if (isSemi) { @@ -480,7 +480,7 @@ export function uiFieldCombo(field, context) { }; }); - var currLength = commonValues.join(';').length; + var currLength = utilUnicodeCharsCount(commonValues.join(';')); // limit the input length to the remaining available characters maxLength = context.maxCharsForTagValue() - currLength; diff --git a/modules/ui/fields/wikidata.js b/modules/ui/fields/wikidata.js index e2a2311e6..e27316558 100644 --- a/modules/ui/fields/wikidata.js +++ b/modules/ui/fields/wikidata.js @@ -14,7 +14,8 @@ import { svgIcon } from '../../svg/icon'; import { utilGetSetValue, utilNoAuto, - utilRebind + utilRebind, + utilUnicodeCharsTruncated } from '../../util'; import { t } from '../../core/localizer'; @@ -236,7 +237,7 @@ export function uiFieldWikidata(field, context) { } if (newWikipediaValue) { - newWikipediaValue = newWikipediaValue.substr(0, context.maxCharsForTagValue()); + newWikipediaValue = utilUnicodeCharsTruncated(newWikipediaValue, context.maxCharsForTagValue()); } if (typeof newWikipediaValue === 'undefined') return; diff --git a/modules/ui/fields/wikipedia.js b/modules/ui/fields/wikipedia.js index a43dcea6b..ad62f32a3 100644 --- a/modules/ui/fields/wikipedia.js +++ b/modules/ui/fields/wikipedia.js @@ -7,7 +7,7 @@ import { actionChangeTags } from '../../actions/change_tags'; import { services } from '../../services/index'; import { svgIcon } from '../../svg/icon'; import { uiCombobox } from '../combobox'; -import { utilGetSetValue, utilNoAuto, utilRebind } from '../../util'; +import { utilGetSetValue, utilNoAuto, utilRebind, utilUnicodeCharsTruncated } from '../../util'; export function uiFieldWikipedia(field, context) { @@ -192,7 +192,7 @@ export function uiFieldWikipedia(field, context) { } if (value) { - syncTags.wikipedia = (language()[2] + ':' + value).substr(0, context.maxCharsForTagValue()); + syncTags.wikipedia = utilUnicodeCharsTruncated(language()[2] + ':' + value, context.maxCharsForTagValue()); } else { syncTags.wikipedia = undefined; } diff --git a/modules/ui/sections/raw_tag_editor.js b/modules/ui/sections/raw_tag_editor.js index 3ef28deb8..643429f3b 100644 --- a/modules/ui/sections/raw_tag_editor.js +++ b/modules/ui/sections/raw_tag_editor.js @@ -9,7 +9,7 @@ import { uiTagReference } from '../tag_reference'; import { prefs } from '../../core/preferences'; import { t } from '../../core/localizer'; import { utilArrayDifference, utilArrayIdentical } from '../../util/array'; -import { utilGetSetValue, utilNoAuto, utilRebind, utilTagDiff } from '../../util'; +import { utilGetSetValue, utilNoAuto, utilRebind, utilTagDiff, utilUnicodeCharsTruncated } from '../../util'; export function uiSectionRawTagEditor(id, context) { @@ -343,8 +343,8 @@ export function uiSectionRawTagEditor(id, context) { newText.split('\n').forEach(function(row) { var m = row.match(/^\s*([^=]+)=(.*)$/); if (m !== null) { - var k = unstringify(m[1].trim()).substr(0, maxKeyLength); - var v = unstringify(m[2].trim()).substr(0, maxValueLength); + var k = utilUnicodeCharsTruncated(unstringify(m[1].trim()), maxKeyLength); + var v = utilUnicodeCharsTruncated(unstringify(m[2].trim()), maxValueLength); newTags[k] = v; } }); diff --git a/modules/util/index.js b/modules/util/index.js index 5364149c5..11358f5b5 100644 --- a/modules/util/index.js +++ b/modules/util/index.js @@ -46,5 +46,7 @@ export { utilTagDiff } from './util'; export { utilTagText } from './util'; export { utilTiler } from './tiler'; export { utilTriggerEvent } from './trigger_event'; +export { utilUnicodeCharsCount } from './util'; +export { utilUnicodeCharsTruncated } from './util'; export { utilUniqueDomId } from './util'; export { utilWrap } from './util'; diff --git a/modules/util/util.js b/modules/util/util.js index f10a39332..a33e1e62a 100644 --- a/modules/util/util.js +++ b/modules/util/util.js @@ -513,3 +513,13 @@ export function utilSafeClassName(str) { export function utilUniqueDomId(str) { return 'ideditor-' + utilSafeClassName(str) + '-' + new Date().getTime().toString(); } + +export function utilUnicodeCharsCount(str) { + // Converting to an array gives us unicode characters instead of JavaScript + // UTF-16 code units from `String.length()` + return Array.from(str).length; +} + +export function utilUnicodeCharsTruncated(str, limit) { + return Array.from(str).slice(0, limit).join(''); +} diff --git a/test/spec/util/util.js b/test/spec/util/util.js index b71736ac8..6f5cf3a69 100644 --- a/test/spec/util/util.js +++ b/test/spec/util/util.js @@ -143,4 +143,82 @@ describe('iD.util', function() { }); }); }); + + describe('utilUnicodeCharsCount', function() { + it('counts empty string', function() { + expect(iD.utilUnicodeCharsCount('')).to.eql(0); + }); + it('counts latin text', function() { + expect(iD.utilUnicodeCharsCount('Lorem')).to.eql(5); + }); + it('counts diacritics', function() { + expect(iD.utilUnicodeCharsCount('Ĺo͂řȩm̅')).to.eql(7); + }); + it('counts Korean text', function() { + expect(iD.utilUnicodeCharsCount('뎌쉐')).to.eql(2); + }); + it('counts Hindi text with combining marks', function() { + expect(iD.utilUnicodeCharsCount('अनुच्छेद')).to.eql(8); + }); + it('counts demonic multiple combining marks', function() { + expect(iD.utilUnicodeCharsCount('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞')).to.eql(74); + }); + it('counts emoji', function() { + expect(iD.utilUnicodeCharsCount('😎')).to.eql(1); + expect(iD.utilUnicodeCharsCount('🇨🇦')).to.eql(2); + expect(iD.utilUnicodeCharsCount('🏳️‍🌈')).to.eql(4); + expect(iD.utilUnicodeCharsCount('‍👩‍👩‍👧‍👧')).to.eql(8); + expect(iD.utilUnicodeCharsCount('👩‍❤️‍💋‍👩')).to.eql(8); + expect(iD.utilUnicodeCharsCount('😎😬😆😵😴😄🙂🤔')).to.eql(8); + }); + }); + + describe('utilUnicodeCharsTruncated', function() { + it('truncates empty string', function() { + expect(iD.utilUnicodeCharsTruncated('', 0)).to.eql(''); + expect(iD.utilUnicodeCharsTruncated('', 255)).to.eql(''); + }); + it('truncates latin text', function() { + expect(iD.utilUnicodeCharsTruncated('Lorem', 0)).to.eql(''); + expect(iD.utilUnicodeCharsTruncated('Lorem', 3)).to.eql('Lor'); + expect(iD.utilUnicodeCharsTruncated('Lorem', 5)).to.eql('Lorem'); + expect(iD.utilUnicodeCharsTruncated('Lorem', 255)).to.eql('Lorem'); + }); + it('truncates diacritics', function() { + expect(iD.utilUnicodeCharsTruncated('Ĺo͂řȩm̅', 0)).to.eql(''); + expect(iD.utilUnicodeCharsTruncated('Ĺo͂řȩm̅', 3)).to.eql('Ĺo͂'); + expect(iD.utilUnicodeCharsTruncated('Ĺo͂řȩm̅', 7)).to.eql('Ĺo͂řȩm̅'); + expect(iD.utilUnicodeCharsTruncated('Ĺo͂řȩm̅', 255)).to.eql('Ĺo͂řȩm̅'); + }); + it('truncates Korean text', function() { + expect(iD.utilUnicodeCharsTruncated('뎌쉐', 0)).to.eql(''); + expect(iD.utilUnicodeCharsTruncated('뎌쉐', 1)).to.eql('뎌'); + expect(iD.utilUnicodeCharsTruncated('뎌쉐', 2)).to.eql('뎌쉐'); + expect(iD.utilUnicodeCharsTruncated('뎌쉐', 255)).to.eql('뎌쉐'); + }); + it('truncates Hindi text with combining marks', function() { + expect(iD.utilUnicodeCharsTruncated('अनुच्छेद', 0)).to.eql(''); + expect(iD.utilUnicodeCharsTruncated('अनुच्छेद', 3)).to.eql('अनु'); + expect(iD.utilUnicodeCharsTruncated('अनुच्छेद', 8)).to.eql('अनुच्छेद'); + expect(iD.utilUnicodeCharsTruncated('अनुच्छेद', 255)).to.eql('अनुच्छेद'); + }); + it('truncates demonic multiple combining marks', function() { + expect(iD.utilUnicodeCharsTruncated('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞', 0)).to.eql(''); + expect(iD.utilUnicodeCharsTruncated('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖', 59)).to.eql('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖'); + expect(iD.utilUnicodeCharsTruncated('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞', 74)).to.eql('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞'); + expect(iD.utilUnicodeCharsTruncated('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞', 255)).to.eql('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞'); + }); + it('truncates emoji', function() { + expect(iD.utilUnicodeCharsTruncated('😎', 0)).to.eql(''); + expect(iD.utilUnicodeCharsTruncated('😎', 1)).to.eql('😎'); + expect(iD.utilUnicodeCharsTruncated('🇨🇦', 1)).to.eql('🇨'); + expect(iD.utilUnicodeCharsTruncated('🏳️‍🌈', 2)).to.eql('🏳️'); + expect(iD.utilUnicodeCharsTruncated('‍👩‍👩‍👧‍👧', 4)).to.eql('‍👩‍👩'); + expect(iD.utilUnicodeCharsTruncated('👩‍❤️‍💋‍👩', 6)).to.eql('👩‍❤️‍💋'); + expect(iD.utilUnicodeCharsTruncated('😎😬😆😵😴😄🙂🤔', 0)).to.eql(''); + expect(iD.utilUnicodeCharsTruncated('😎😬😆😵😴😄🙂🤔', 4)).to.eql('😎😬😆😵'); + expect(iD.utilUnicodeCharsTruncated('😎😬😆😵😴😄🙂🤔', 8)).to.eql('😎😬😆😵😴😄🙂🤔'); + expect(iD.utilUnicodeCharsTruncated('😎😬😆😵😴😄🙂🤔', 255)).to.eql('😎😬😆😵😴😄🙂🤔'); + }); + }); });