我想检查数字是否为阿拉伯语,并将其所有数字替换为英语,以便我可以对其进行计算。我正在尝试这个函数,但我不知道正则表达式适合它:
function enNumber(num)
{
for (var i=0;i<10;i++)
num=num.replace(*[regular expression]*,i);
return num;}
答案 0 :(得分:2)
这可以解决你的要求:
function fromArabicToASCII(arabic) {
return arabic.replace(
/[\u0660-\u0669\u06F0-\u06F9]/g,
function(a) {
return String.fromCharCode((a.charCodeAt(0)&15)+48);
}
);
};
下面的函数支持Unicode类Nd(Number,Decimal Digit): Adlam,Ahom,Arabic-indic,Balinese,Bengali,Bhaiksuki,Brahmi,Chakma,Cham,Devanagari,Extended Arabic-indic,Fullwidth,Gujarati,Gurmukhi,Javanese,Kannada,Kayah Li,Khmer,Khudawadi,Lao,Lepcha ,Limbu,Malayalam,数学大胆,数学双击,数学等宽,数学Sans-serif大胆,数学Sans-serif,Meetei Mayek,Modi,蒙古语,Mro,缅甸Shan,缅甸Tai Laing,缅甸,New Tai Lue,Newa ,Nko,Ol Chiki,Oriya,Osmanya,Pahawh Hmong,Saurashtra,Sharada,Sinhala Lith,Sora Sompeng,Sundanese,Tai Tham Hora,Tai Tham Tham,Takri,Tamil,Telugu,Thai,Tibetan,Tirhuta,Vai,Warang Citi。
不支持罗马数字和其他非十进制数字,因为它们不是十进制数。
// This function takes an UTF16 encoded string as input,
// and returns with all suported digits from Unicode
// class 'Nd' (Number, Decimal Digit) replaced with their
// equivalent ASCII digit.
// Source : http://stackoverflow.com/a/12171250/36866
// License: MIT
// Author : some@domain.name
// Note : If you are going to use this code I would appreciate to
// get an email to some@domain.name. You don't have to but
// it would make me happier!
var digitsToASCII=
(function () {
// Regexp that matches all supported digits.
// Most Unicode digit classes have the zero digit at a codepoint
// where the four least significant bits are ether zero or six.
// The notable exception is the Math-class where several classes
// have sequential codepoints. The information about the offset
// is needed when decoding, and by using groups in the RexExp
// no lookup is needed.
var reDigit = new RegExp(
'('+ // Offset 0
'['+
'\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9'+
'\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049'+
'\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u19D0-\u19D9'+
'\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9'+
'\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9'+
'\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59'+
'\uABF0-\uABF9\uFF10-\uFF19'+
']'+
'|\uD801[\uDCA0-\uDCA9]'+
'|\uD804[\uDCF0-\uDCF9\uDDD0-\uDDD9\uDEF0-\uDEF9]'+
'|\uD805['+
'\uDC50-\uDC59\uDCD0-\uDCD9\uDE50-\uDE59'+
'\uDEC0-\uDEC9\uDF30-\uDF39'+
']'+
'|\uD806[\uDCE0-\uDCE9]|\uD807[\uDC50-\uDC59]'+
'|\uD81A[\uDE60-\uDE69]|\uD81A[\uDF50-\uDF59]'+
'|\uD83A[\uDD50-\uDD59]'+
')|('+ // Offset 6
'['+
'\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF'+
'\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF'+
'\u0D66-\u0D6F\u0DE6-\u0DEF\u1946-\u194F'+
']'+
'|\uD804[\uDC66-\uDC6F\uDD36-\uDD3F]'+
')|('+ // Math
'\uD835[\uDFCE-\uDFFF]'+
')',
'g'
);
function replace(match, offset0, offset6, offsetMath) {
// 'match' contains the whole match and can therefore have
// a length longer than one character if surrogate pairs is used.
// By getting the last character from 'match' the operation is simplified.
var raw = match.charCodeAt( match.length - 1);
var digit =
offset0 ? raw & 0xF : // use 4 bits
offset6 ? (raw -6) & 0xF : // subtract 6, use 4 bits
offsetMath ? ((raw - 0xCE) & 0x3F) % 10 : // subtract CE, use 6 bits
null;
return String.fromCharCode(48 + digit); // Digit to ASCII
}
return function replaceDigits(input) {
return input.replace(reDigit, replace);
}
})();
用法:
myAsciiVariable = digitsToASCII( myForeignVariable );
现在,在名为unicodedigits
的node.js模块中可以使用与此类似的功能。该功能可以从任何支持的数字转换为ASCII或任何支持的范围。
您可以使用npm install unicodedigits --save
进行安装,也可以在github.com/somec/unicodedigits