From cc51a2f038098d1dda8bc5a761ba9869cffe5191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Minh=20Nguy=E1=BB=85n?= Date: Sun, 14 Jun 2015 10:18:04 -0700 Subject: [PATCH] Anchors in Wikipedia URLs Corrected several issues with section anchors in Wikipedia URLs: - When normalizing a pasted-in URL into a tag value, try to decode the anchor. - Also when normalizing into a tag value, recognize variant paths such as zh.wikipedia.org/zh-hk/. - When the tag value contains an anchor, encode it before adding it to the link. --- js/id/ui/preset/wikipedia.js | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/js/id/ui/preset/wikipedia.js b/js/id/ui/preset/wikipedia.js index c215587a3..dbc0f5217 100644 --- a/js/id/ui/preset/wikipedia.js +++ b/js/id/ui/preset/wikipedia.js @@ -84,12 +84,22 @@ iD.ui.preset.wikipedia = function(field, context) { function change() { var value = title.value(), - m = value.match(/https?:\/\/([-a-z]+)\.wikipedia\.org\/wiki\/(.+)/), - l = m && _.find(iD.data.wikipedia, function(d) { return m[1] === d[2]; }); + m = value.match(/https?:\/\/([-a-z]+)\.wikipedia\.org\/(?:wiki|\1-[-a-z]+)\/([^#]+)(?:#(.+))?/), + l = m && _.find(iD.data.wikipedia, function(d) { return m[1] === d[2]; }), + anchor; if (l) { // Normalize title http://www.mediawiki.org/wiki/API:Query#Title_normalization - value = m[2].replace(/_/g, ' '); + value = decodeURIComponent(m[2]).replace(/_/g, ' '); + if (m[3]) { + try { + // Best-effort `anchordecode:` implementation + anchor = decodeURIComponent(m[3].replace(/\.([0-9A-F]{2})/g, '%$1')); + } catch (e) { + anchor = decodeURIComponent(m[3]); + } + value += "#" + anchor.replace(/_/g, ' '); + } value = value.slice(0, 1).toUpperCase() + value.slice(1); lang.value(l[1]); title.value(value); @@ -102,14 +112,22 @@ iD.ui.preset.wikipedia = function(field, context) { i.tags = function(tags) { var value = tags[field.key] || '', - m = value.match(/([^:]+):(.+)/), - l = m && _.find(iD.data.wikipedia, function(d) { return m[1] === d[2]; }); + m = value.match(/([^:]+):([^#]+)(?:#(.+))?/), + l = m && _.find(iD.data.wikipedia, function(d) { return m[1] === d[2]; }), + anchor = m && m[3]; // value in correct format if (l) { lang.value(l[1]); - title.value(m[2]); - link.attr('href', 'http://' + m[1] + '.wikipedia.org/wiki/' + m[2]); + title.value(m[2] + (anchor ? ('#' + anchor) : '')); + try { + // Best-effort `anchorencode:` implementation + anchor = encodeURIComponent(anchor.replace(/ /g, '_')).replace(/%/g, '.'); + } catch (e) { + anchor = anchor.replace(/ /g, '_'); + } + link.attr('href', 'http://' + m[1] + '.wikipedia.org/wiki/' + + m[2].replace(/ /g, '_') + (anchor ? ('#' + anchor) : '')); // unrecognized value format } else {