正则表达式,用于将其他语言的数字更改为英文数字

时间:2012-08-29 04:55:11

标签: javascript regex unicode

我想检查数字是否为阿拉伯语,并将其所有数字替换为英语,以便我可以对其进行计算。我正在尝试这个函数,但我不知道正则表达式适合它:

function enNumber(num)
{
for (var i=0;i<10;i++)
num=num.replace(*[regular expression]*,i);
return num;}

1 个答案:

答案 0 :(得分:2)

提出问题的最低解决方案

这可以解决你的要求:

function fromArabicToASCII(arabic) {
  return arabic.replace(
    /[\u0660-\u0669\u06F0-\u06F9]/g,
    function(a) {
      return String.fromCharCode((a.charCodeAt(0)&15)+48);
    }
  );
};

Unicode类Nd(Number,Decimal Digit)中所有已知数字的常规函数​​

下面的函数支持Unicode类Nd(Number,Decimal Digit): Adlam,Ahom,Arabic-indic,Balinese,Bengali,Bhaiksuki,Brahmi,Chakma,Cham,Devanagari,Extended Arabic-indic,Fullwidth,Gujarati,Gurmukhi,Javanese,Kannada,Kayah Li,Khmer,Khudawadi,Lao,Lepcha ,Limbu,Malayalam,数学大胆,数学双击,数学等宽,数学Sans-serif大胆,数学Sans-serif,Meetei Mayek,Modi,蒙古语,Mro,缅甸Shan,缅甸Tai Laing,缅甸,New Tai Lue,Newa ,Nko,Ol Chiki,Oriya,Osmanya,Pahawh Hmong,Saurashtra,Sharada,Sinhala Lith,Sora Sompeng,Sundanese,Tai Tham Hora,Tai Tham Tham,Takri,Tamil,Telugu,Thai,Tibetan,Tirhuta,Vai,Warang Citi。

不支持罗马数字和其他非十进制数字,因为它们不是十进制数。

// This function takes an UTF16 encoded string as input,
// and returns with all suported digits from Unicode
// class 'Nd' (Number, Decimal Digit) replaced with their
// equivalent ASCII digit.
// Source : http://stackoverflow.com/a/12171250/36866
// License: MIT
// Author : some@domain.name
// Note   : If you are going to use this code I would appreciate to
//          get an email to some@domain.name. You don't have to but
//          it would make me happier!
var digitsToASCII=
  (function () {
    // Regexp that matches all supported digits.
    // Most Unicode digit classes have the zero digit at a codepoint
    // where the four least significant bits are ether zero or six.
    // The notable exception is the Math-class where several classes
    // have sequential codepoints. The information about the offset
    // is needed when decoding, and by using groups in the RexExp
    // no lookup is needed.
    var reDigit = new RegExp(
      '('+ // Offset 0
        '['+
          '\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9'+
          '\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049'+
          '\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u19D0-\u19D9'+
          '\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9'+
          '\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9'+
          '\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59'+
          '\uABF0-\uABF9\uFF10-\uFF19'+
        ']'+
        '|\uD801[\uDCA0-\uDCA9]'+
        '|\uD804[\uDCF0-\uDCF9\uDDD0-\uDDD9\uDEF0-\uDEF9]'+
        '|\uD805['+
          '\uDC50-\uDC59\uDCD0-\uDCD9\uDE50-\uDE59'+
          '\uDEC0-\uDEC9\uDF30-\uDF39'+
        ']'+
        '|\uD806[\uDCE0-\uDCE9]|\uD807[\uDC50-\uDC59]'+
        '|\uD81A[\uDE60-\uDE69]|\uD81A[\uDF50-\uDF59]'+
        '|\uD83A[\uDD50-\uDD59]'+
      ')|('+ // Offset 6
        '['+
          '\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF'+
          '\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF'+
          '\u0D66-\u0D6F\u0DE6-\u0DEF\u1946-\u194F'+
        ']'+
        '|\uD804[\uDC66-\uDC6F\uDD36-\uDD3F]'+
      ')|('+ // Math
        '\uD835[\uDFCE-\uDFFF]'+
      ')',
      'g'
    );

    function replace(match, offset0, offset6, offsetMath) {
      // 'match' contains the whole match and can therefore have
      // a length longer than one character if surrogate pairs is used.
      // By getting the last character from 'match' the operation is simplified.
      var raw = match.charCodeAt( match.length - 1);
      var digit =
        offset0 ? raw & 0xF : // use 4 bits
        offset6 ? (raw -6) & 0xF : // subtract 6, use 4 bits
        offsetMath ? ((raw - 0xCE) & 0x3F) % 10 : // subtract CE, use 6 bits
        null;

      return String.fromCharCode(48 + digit); // Digit to ASCII
    }

    return function replaceDigits(input) {
      return input.replace(reDigit, replace);
    }
  })();

用法:

myAsciiVariable = digitsToASCII( myForeignVariable );

node.js的模块

现在,在名为unicodedigits的node.js模块中可以使用与此类似的功能。该功能可以从任何支持的数字转换为ASCII或任何支持的范围。

您可以使用npm install unicodedigits --save进行安装,也可以在github.com/somec/unicodedigits

上找到它