Fixed issues and added new scripts to urlify.js
Added the following scripts: * Lithuanian (thanks to petraszd for the patch) * Serbian (thanks to offy) * Azerbajani (thanks to Ali Ismayilov) Fixed the following issues: * In Polish character map, some uppercase letters were converted to a lowercase ascii equivalent. * The letter Y with a diaeresis had a lowercase version but no uppercase. The code was also simplified and cleaned up: * Use of `var` statements everywhere * Systematic use of semicolons * Proper looping over arrays and objects. Thanks to @oinopion for his help in getting the javascript into proper shape. Fixes #8561, #11035.
This commit is contained in:
parent
b77f26313c
commit
20948612c7
|
@ -3,15 +3,15 @@ var LATIN_MAP = {
|
||||||
'C', 'È': 'E', 'É': 'E', 'Ê': 'E', 'Ë': 'E', 'Ì': 'I', 'Í': 'I', 'Î': 'I',
|
'C', 'È': 'E', 'É': 'E', 'Ê': 'E', 'Ë': 'E', 'Ì': 'I', 'Í': 'I', 'Î': 'I',
|
||||||
'Ï': 'I', 'Ð': 'D', 'Ñ': 'N', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ö':
|
'Ï': 'I', 'Ð': 'D', 'Ñ': 'N', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ö':
|
||||||
'O', 'Ő': 'O', 'Ø': 'O', 'Ù': 'U', 'Ú': 'U', 'Û': 'U', 'Ü': 'U', 'Ű': 'U',
|
'O', 'Ő': 'O', 'Ø': 'O', 'Ù': 'U', 'Ú': 'U', 'Û': 'U', 'Ü': 'U', 'Ű': 'U',
|
||||||
'Ý': 'Y', 'Þ': 'TH', 'ß': 'ss', 'à':'a', 'á':'a', 'â': 'a', 'ã': 'a', 'ä':
|
'Ý': 'Y', 'Þ': 'TH', 'Ÿ': 'Y', 'ß': 'ss', 'à':'a', 'á':'a', 'â': 'a', 'ã':
|
||||||
'a', 'å': 'a', 'æ': 'ae', 'ç': 'c', 'è': 'e', 'é': 'e', 'ê': 'e', 'ë': 'e',
|
'a', 'ä': 'a', 'å': 'a', 'æ': 'ae', 'ç': 'c', 'è': 'e', 'é': 'e', 'ê': 'e',
|
||||||
'ì': 'i', 'í': 'i', 'î': 'i', 'ï': 'i', 'ð': 'd', 'ñ': 'n', 'ò': 'o', 'ó':
|
'ë': 'e', 'ì': 'i', 'í': 'i', 'î': 'i', 'ï': 'i', 'ð': 'd', 'ñ': 'n', 'ò':
|
||||||
'o', 'ô': 'o', 'õ': 'o', 'ö': 'o', 'ő': 'o', 'ø': 'o', 'ù': 'u', 'ú': 'u',
|
'o', 'ó': 'o', 'ô': 'o', 'õ': 'o', 'ö': 'o', 'ő': 'o', 'ø': 'o', 'ù': 'u',
|
||||||
'û': 'u', 'ü': 'u', 'ű': 'u', 'ý': 'y', 'þ': 'th', 'ÿ': 'y'
|
'ú': 'u', 'û': 'u', 'ü': 'u', 'ű': 'u', 'ý': 'y', 'þ': 'th', 'ÿ': 'y'
|
||||||
}
|
};
|
||||||
var LATIN_SYMBOLS_MAP = {
|
var LATIN_SYMBOLS_MAP = {
|
||||||
'©':'(c)'
|
'©':'(c)'
|
||||||
}
|
};
|
||||||
var GREEK_MAP = {
|
var GREEK_MAP = {
|
||||||
'α':'a', 'β':'b', 'γ':'g', 'δ':'d', 'ε':'e', 'ζ':'z', 'η':'h', 'θ':'8',
|
'α':'a', 'β':'b', 'γ':'g', 'δ':'d', 'ε':'e', 'ζ':'z', 'η':'h', 'θ':'8',
|
||||||
'ι':'i', 'κ':'k', 'λ':'l', 'μ':'m', 'ν':'n', 'ξ':'3', 'ο':'o', 'π':'p',
|
'ι':'i', 'κ':'k', 'λ':'l', 'μ':'m', 'ν':'n', 'ξ':'3', 'ο':'o', 'π':'p',
|
||||||
|
@ -23,11 +23,11 @@ var GREEK_MAP = {
|
||||||
'Ρ':'R', 'Σ':'S', 'Τ':'T', 'Υ':'Y', 'Φ':'F', 'Χ':'X', 'Ψ':'PS', 'Ω':'W',
|
'Ρ':'R', 'Σ':'S', 'Τ':'T', 'Υ':'Y', 'Φ':'F', 'Χ':'X', 'Ψ':'PS', 'Ω':'W',
|
||||||
'Ά':'A', 'Έ':'E', 'Ί':'I', 'Ό':'O', 'Ύ':'Y', 'Ή':'H', 'Ώ':'W', 'Ϊ':'I',
|
'Ά':'A', 'Έ':'E', 'Ί':'I', 'Ό':'O', 'Ύ':'Y', 'Ή':'H', 'Ώ':'W', 'Ϊ':'I',
|
||||||
'Ϋ':'Y'
|
'Ϋ':'Y'
|
||||||
}
|
};
|
||||||
var TURKISH_MAP = {
|
var TURKISH_MAP = {
|
||||||
'ş':'s', 'Ş':'S', 'ı':'i', 'İ':'I', 'ç':'c', 'Ç':'C', 'ü':'u', 'Ü':'U',
|
'ş':'s', 'Ş':'S', 'ı':'i', 'İ':'I', 'ç':'c', 'Ç':'C', 'ü':'u', 'Ü':'U',
|
||||||
'ö':'o', 'Ö':'O', 'ğ':'g', 'Ğ':'G'
|
'ö':'o', 'Ö':'O', 'ğ':'g', 'Ğ':'G'
|
||||||
}
|
};
|
||||||
var RUSSIAN_MAP = {
|
var RUSSIAN_MAP = {
|
||||||
'а':'a', 'б':'b', 'в':'v', 'г':'g', 'д':'d', 'е':'e', 'ё':'yo', 'ж':'zh',
|
'а':'a', 'б':'b', 'в':'v', 'г':'g', 'д':'d', 'е':'e', 'ё':'yo', 'ж':'zh',
|
||||||
'з':'z', 'и':'i', 'й':'j', 'к':'k', 'л':'l', 'м':'m', 'н':'n', 'о':'o',
|
'з':'z', 'и':'i', 'й':'j', 'к':'k', 'л':'l', 'м':'m', 'н':'n', 'о':'o',
|
||||||
|
@ -39,92 +39,91 @@ var RUSSIAN_MAP = {
|
||||||
'П':'P', 'Р':'R', 'С':'S', 'Т':'T', 'У':'U', 'Ф':'F', 'Х':'H', 'Ц':'C',
|
'П':'P', 'Р':'R', 'С':'S', 'Т':'T', 'У':'U', 'Ф':'F', 'Х':'H', 'Ц':'C',
|
||||||
'Ч':'Ch', 'Ш':'Sh', 'Щ':'Sh', 'Ъ':'', 'Ы':'Y', 'Ь':'', 'Э':'E', 'Ю':'Yu',
|
'Ч':'Ch', 'Ш':'Sh', 'Щ':'Sh', 'Ъ':'', 'Ы':'Y', 'Ь':'', 'Э':'E', 'Ю':'Yu',
|
||||||
'Я':'Ya'
|
'Я':'Ya'
|
||||||
}
|
};
|
||||||
var UKRAINIAN_MAP = {
|
var UKRAINIAN_MAP = {
|
||||||
'Є':'Ye', 'І':'I', 'Ї':'Yi', 'Ґ':'G', 'є':'ye', 'і':'i', 'ї':'yi', 'ґ':'g'
|
'Є':'Ye', 'І':'I', 'Ї':'Yi', 'Ґ':'G', 'є':'ye', 'і':'i', 'ї':'yi', 'ґ':'g'
|
||||||
}
|
};
|
||||||
var CZECH_MAP = {
|
var CZECH_MAP = {
|
||||||
'č':'c', 'ď':'d', 'ě':'e', 'ň': 'n', 'ř':'r', 'š':'s', 'ť':'t', 'ů':'u',
|
'č':'c', 'ď':'d', 'ě':'e', 'ň': 'n', 'ř':'r', 'š':'s', 'ť':'t', 'ů':'u',
|
||||||
'ž':'z', 'Č':'C', 'Ď':'D', 'Ě':'E', 'Ň': 'N', 'Ř':'R', 'Š':'S', 'Ť':'T',
|
'ž':'z', 'Č':'C', 'Ď':'D', 'Ě':'E', 'Ň': 'N', 'Ř':'R', 'Š':'S', 'Ť':'T',
|
||||||
'Ů':'U', 'Ž':'Z'
|
'Ů':'U', 'Ž':'Z'
|
||||||
}
|
};
|
||||||
|
|
||||||
var POLISH_MAP = {
|
var POLISH_MAP = {
|
||||||
'ą':'a', 'ć':'c', 'ę':'e', 'ł':'l', 'ń':'n', 'ó':'o', 'ś':'s', 'ź':'z',
|
'ą':'a', 'ć':'c', 'ę':'e', 'ł':'l', 'ń':'n', 'ó':'o', 'ś':'s', 'ź':'z',
|
||||||
'ż':'z', 'Ą':'A', 'Ć':'C', 'Ę':'e', 'Ł':'L', 'Ń':'N', 'Ó':'o', 'Ś':'S',
|
'ż':'z', 'Ą':'A', 'Ć':'C', 'Ę':'E', 'Ł':'L', 'Ń':'N', 'Ó':'O', 'Ś':'S',
|
||||||
'Ź':'Z', 'Ż':'Z'
|
'Ź':'Z', 'Ż':'Z'
|
||||||
}
|
};
|
||||||
|
|
||||||
var LATVIAN_MAP = {
|
var LATVIAN_MAP = {
|
||||||
'ā':'a', 'č':'c', 'ē':'e', 'ģ':'g', 'ī':'i', 'ķ':'k', 'ļ':'l', 'ņ':'n',
|
'ā':'a', 'č':'c', 'ē':'e', 'ģ':'g', 'ī':'i', 'ķ':'k', 'ļ':'l', 'ņ':'n',
|
||||||
'š':'s', 'ū':'u', 'ž':'z', 'Ā':'A', 'Č':'C', 'Ē':'E', 'Ģ':'G', 'Ī':'i',
|
'š':'s', 'ū':'u', 'ž':'z', 'Ā':'A', 'Č':'C', 'Ē':'E', 'Ģ':'G', 'Ī':'I',
|
||||||
'Ķ':'k', 'Ļ':'L', 'Ņ':'N', 'Š':'S', 'Ū':'u', 'Ž':'Z'
|
'Ķ':'K', 'Ļ':'L', 'Ņ':'N', 'Š':'S', 'Ū':'U', 'Ž':'Z'
|
||||||
}
|
};
|
||||||
|
|
||||||
var ARABIC_MAP = {
|
var ARABIC_MAP = {
|
||||||
'أ':'a', 'ب':'b', 'ت':'t', 'ث': 'th', 'ج':'g', 'ح':'h', 'خ':'kh', 'د':'d',
|
'أ':'a', 'ب':'b', 'ت':'t', 'ث': 'th', 'ج':'g', 'ح':'h', 'خ':'kh', 'د':'d',
|
||||||
'ذ':'th', 'ر':'r', 'ز':'z', 'س':'s', 'ش':'sh', 'ص':'s', 'ض':'d', 'ط':'t',
|
'ذ':'th', 'ر':'r', 'ز':'z', 'س':'s', 'ش':'sh', 'ص':'s', 'ض':'d', 'ط':'t',
|
||||||
'ظ':'th', 'ع':'aa', 'غ':'gh', 'ف':'f', 'ق':'k', 'ك':'k', 'ل':'l', 'م':'m',
|
'ظ':'th', 'ع':'aa', 'غ':'gh', 'ف':'f', 'ق':'k', 'ك':'k', 'ل':'l', 'م':'m',
|
||||||
'ن':'n', 'ه':'h', 'و':'o', 'ي':'y'
|
'ن':'n', 'ه':'h', 'و':'o', 'ي':'y'
|
||||||
}
|
};
|
||||||
|
var LITHUANIAN_MAP = {
|
||||||
|
'ą':'a', 'č':'c', 'ę':'e', 'ė':'e', 'į':'i', 'š':'s', 'ų':'u', 'ū':'u',
|
||||||
|
'ž':'z',
|
||||||
|
'Ą':'A', 'Č':'C', 'Ę':'E', 'Ė':'E', 'Į':'I', 'Š':'S', 'Ų':'U', 'Ū':'U',
|
||||||
|
'Ž':'Z'
|
||||||
|
};
|
||||||
|
var SERBIAN_MAP = {
|
||||||
|
'ђ':'dj', 'ј':'j', 'љ':'lj', 'њ':'nj', 'ћ':'c', 'џ':'dz', 'đ':'dj',
|
||||||
|
'Ђ':'Dj', 'Ј':'j', 'Љ':'Lj', 'Њ':'Nj', 'Ћ':'C', 'Џ':'Dz', 'Đ':'Dj'
|
||||||
|
};
|
||||||
|
var AZERBAIJANI_MAP = {
|
||||||
|
'ç':'c', 'ə':'e', 'ğ':'g', 'ı':'i', 'ö':'o', 'ş':'s', 'ü':'u',
|
||||||
|
'Ç':'C', 'Ə':'E', 'Ğ':'G', 'İ':'I', 'Ö':'O', 'Ş':'S', 'Ü':'U'
|
||||||
|
};
|
||||||
|
|
||||||
var ALL_DOWNCODE_MAPS=new Array()
|
var ALL_DOWNCODE_MAPS = [
|
||||||
ALL_DOWNCODE_MAPS[0]=LATIN_MAP
|
LATIN_MAP,
|
||||||
ALL_DOWNCODE_MAPS[1]=LATIN_SYMBOLS_MAP
|
LATIN_SYMBOLS_MAP,
|
||||||
ALL_DOWNCODE_MAPS[2]=GREEK_MAP
|
GREEK_MAP,
|
||||||
ALL_DOWNCODE_MAPS[3]=TURKISH_MAP
|
TURKISH_MAP,
|
||||||
ALL_DOWNCODE_MAPS[4]=RUSSIAN_MAP
|
RUSSIAN_MAP,
|
||||||
ALL_DOWNCODE_MAPS[5]=UKRAINIAN_MAP
|
UKRAINIAN_MAP,
|
||||||
ALL_DOWNCODE_MAPS[6]=CZECH_MAP
|
CZECH_MAP,
|
||||||
ALL_DOWNCODE_MAPS[7]=POLISH_MAP
|
POLISH_MAP,
|
||||||
ALL_DOWNCODE_MAPS[8]=LATVIAN_MAP
|
LATVIAN_MAP,
|
||||||
ALL_DOWNCODE_MAPS[9]=ARABIC_MAP
|
ARABIC_MAP,
|
||||||
|
LITHUANIAN_MAP,
|
||||||
|
SERBIAN_MAP,
|
||||||
|
AZERBAIJANI_MAP
|
||||||
|
];
|
||||||
|
|
||||||
var Downcoder = new Object();
|
var Downcoder = {
|
||||||
Downcoder.Initialize = function()
|
'Initialize': function() {
|
||||||
{
|
if (Downcoder.map) { // already made
|
||||||
if (Downcoder.map) // already made
|
return;
|
||||||
return ;
|
|
||||||
Downcoder.map ={}
|
|
||||||
Downcoder.chars = '' ;
|
|
||||||
for(var i in ALL_DOWNCODE_MAPS)
|
|
||||||
{
|
|
||||||
var lookup = ALL_DOWNCODE_MAPS[i]
|
|
||||||
for (var c in lookup)
|
|
||||||
{
|
|
||||||
Downcoder.map[c] = lookup[c] ;
|
|
||||||
Downcoder.chars += c ;
|
|
||||||
}
|
}
|
||||||
}
|
Downcoder.map = {};
|
||||||
Downcoder.regex = new RegExp('[' + Downcoder.chars + ']|[^' + Downcoder.chars + ']+','g') ;
|
Downcoder.chars = [];
|
||||||
}
|
for (var i=0; i<ALL_DOWNCODE_MAPS.length; i++) {
|
||||||
|
var lookup = ALL_DOWNCODE_MAPS[i];
|
||||||
downcode= function( slug )
|
for (var c in lookup) {
|
||||||
{
|
if (lookup.hasOwnProperty(c)) {
|
||||||
Downcoder.Initialize() ;
|
Downcoder.map[c] = lookup[c];
|
||||||
var downcoded =""
|
|
||||||
var pieces = slug.match(Downcoder.regex);
|
|
||||||
if(pieces)
|
|
||||||
{
|
|
||||||
for (var i = 0 ; i < pieces.length ; i++)
|
|
||||||
{
|
|
||||||
if (pieces[i].length == 1)
|
|
||||||
{
|
|
||||||
var mapped = Downcoder.map[pieces[i]] ;
|
|
||||||
if (mapped != null)
|
|
||||||
{
|
|
||||||
downcoded+=mapped;
|
|
||||||
continue ;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
downcoded+=pieces[i];
|
|
||||||
}
|
}
|
||||||
|
for (var k in Downcoder.map) {
|
||||||
|
if (Downcoder.map.hasOwnProperty(k)) {
|
||||||
|
Downcoder.chars.push(k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Downcoder.regex = new RegExp(Downcoder.chars.join('|'), 'g');
|
||||||
}
|
}
|
||||||
else
|
};
|
||||||
{
|
|
||||||
downcoded = slug;
|
function downcode(slug) {
|
||||||
}
|
Downcoder.Initialize();
|
||||||
return downcoded;
|
return slug.replace(Downcoder.regex, function(m) {
|
||||||
|
return Downcoder.map[m];
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,11 +131,12 @@ function URLify(s, num_chars) {
|
||||||
// changes, e.g., "Petty theft" to "petty_theft"
|
// changes, e.g., "Petty theft" to "petty_theft"
|
||||||
// remove all these words from the string before urlifying
|
// remove all these words from the string before urlifying
|
||||||
s = downcode(s);
|
s = downcode(s);
|
||||||
removelist = ["a", "an", "as", "at", "before", "but", "by", "for", "from",
|
var removelist = [
|
||||||
"is", "in", "into", "like", "of", "off", "on", "onto", "per",
|
"a", "an", "as", "at", "before", "but", "by", "for", "from", "is",
|
||||||
"since", "than", "the", "this", "that", "to", "up", "via",
|
"in", "into", "like", "of", "off", "on", "onto", "per", "since",
|
||||||
"with"];
|
"than", "the", "this", "that", "to", "up", "via", "with"
|
||||||
r = new RegExp('\\b(' + removelist.join('|') + ')\\b', 'gi');
|
];
|
||||||
|
var r = new RegExp('\\b(' + removelist.join('|') + ')\\b', 'gi');
|
||||||
s = s.replace(r, '');
|
s = s.replace(r, '');
|
||||||
// if downcode doesn't hit, the char will be stripped here
|
// if downcode doesn't hit, the char will be stripped here
|
||||||
s = s.replace(/[^-\w\s]/g, ''); // remove unneeded chars
|
s = s.replace(/[^-\w\s]/g, ''); // remove unneeded chars
|
||||||
|
@ -145,4 +145,3 @@ function URLify(s, num_chars) {
|
||||||
s = s.toLowerCase(); // convert to lowercase
|
s = s.toLowerCase(); // convert to lowercase
|
||||||
return s.substring(0, num_chars);// trim to first num_chars chars
|
return s.substring(0, num_chars);// trim to first num_chars chars
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue