解码URL中的非英文字母(javascript)

时间:2017-10-12 15:55:17

标签: javascript decodeuricomponent

我的html文件参数中包含一些包含非英文字符的文本。 像那样:http://www.example.com/player.html?name=%e4%e1%ef

我需要将其解码为name =הבן

我试过:

unescape(url) - get wrong characters: name=äáï (maybe from another charset)
//and
decodeURIComponent(url) - get Malformed URI exception
// even found some solutions like
decodeURIComponent((url+'').replace(/\+/g, '%20')) - exception too

任何解决方案?如何使用正确的字符集进行解码?

谢谢

我希望有一个简单的方法,但在我得到改变网站的建议之后,我用肮脏但有效的方式做到了。 我的解决方案:

function decode(str) {
    str=str.split("%e0").join("א");
    str=str.split("%e1").join("ב");
    str=str.split("%e2").join("ג");
    str=str.split("%e3").join("ד");
    str=str.split("%e4").join("ה");
    str=str.split("%e5").join("ו");
    str=str.split("%e6").join("ז");
    str=str.split("%e7").join("ח");
    str=str.split("%e8").join("ט");
    str=str.split("%e9").join("י");
    str=str.split("%ea").join("ך");
    str=str.split("%eb").join("כ");
    str=str.split("%ec").join("ל");
    str=str.split("%ed").join("ם");
    str=str.split("%ee").join("מ");
    str=str.split("%ef").join("ן");
    str=str.split("%f0").join("נ");
    str=str.split("%f1").join("ס");
    str=str.split("%f2").join("ע");
    str=str.split("%f3").join("ף");
    str=str.split("%f4").join("פ");
    str=str.split("%f5").join("ץ");
    str=str.split("%f6").join("צ");
    str=str.split("%f7").join("ק");
    str=str.split("%f8").join("ר");
    str=str.split("%f9").join("ש");
    str=str.split("%fa").join("ת");
    str=str.split("%20").join(" ");
    return str;
}

2 个答案:

答案 0 :(得分:2)

原始网址未正确编码:

heb = "http://www.example.com/player.html?name=הבן"
"http://www.example.com/player.html?name=הבן"

enc = encodeURI(heb)
"http://www.example.com/player.html?name=%D7%94%D7%91%D7%9F"

decodeURI(enc)
"http://www.example.com/player.html?name=הבן"

答案 1 :(得分:1)

免责声明:我未对此代码进行任何全面测试

那就是说,它做你想做的事情并且很直接。使用unicode.org中的转换表执行以下操作:

  1. unescape the string
  2. 将每个字符拆分为数组
  3. 将每个字符转换为unicode号码(UTF-8)
  4. 将数组元素重新分配给相应的unicode号码(iso_8859_8)
  5. 将数组转换回字符串
  6. 
    
    var url = "http://www.example.com/player.html?name=%e4%e1%ef;";
    var string = '%e4%e1%ef';
    
    var charEncodings = {
    '0x00':  '0x0000', // # NULL
    '0x01':  '0x0001', // # START OF HEADING
    '0x02':  '0x0002', // # START OF TEXT
    '0x03':  '0x0003', // # END OF TEXT
    '0x04':  '0x0004', // # END OF TRANSMISSION
    '0x05':  '0x0005', // # ENQUIRY
    '0x06':  '0x0006', // # ACKNOWLEDGE
    '0x07':  '0x0007', // # BELL
    '0x08':  '0x0008', // # BACKSPACE
    '0x09':  '0x0009', // # HORIZONTAL TABULATION
    '0x0A':  '0x000A', // # LINE FEED
    '0x0B':  '0x000B', // # VERTICAL TABULATION
    '0x0C':  '0x000C', // # FORM FEED
    '0x0D':  '0x000D', // # CARRIAGE RETURN
    '0x0E':  '0x000E', // # SHIFT OUT
    '0x0F':  '0x000F', // # SHIFT IN
    '0x10':  '0x0010', // # DATA LINK ESCAPE
    '0x11':  '0x0011', // # DEVICE CONTROL ONE
    '0x12':  '0x0012', // # DEVICE CONTROL TWO
    '0x13':  '0x0013', // # DEVICE CONTROL THREE
    '0x14':  '0x0014', // # DEVICE CONTROL FOUR
    '0x15':  '0x0015', // # NEGATIVE ACKNOWLEDGE
    '0x16':  '0x0016', // # SYNCHRONOUS IDLE
    '0x17':  '0x0017', // # END OF TRANSMISSION BLOCK
    '0x18':  '0x0018', // # CANCEL
    '0x19':  '0x0019', // # END OF MEDIUM
    '0x1A':  '0x001A', // # SUBSTITUTE
    '0x1B':  '0x001B', // # ESCAPE
    '0x1C':  '0x001C', // # FILE SEPARATOR
    '0x1D':  '0x001D', // # GROUP SEPARATOR
    '0x1E':  '0x001E', // # RECORD SEPARATOR
    '0x1F':  '0x001F', // # UNIT SEPARATOR
    '0x20':  '0x0020', // # SPACE
    '0x21':  '0x0021', // # EXCLAMATION MARK
    '0x22':  '0x0022', // # QUOTATION MARK
    '0x23':  '0x0023', // # NUMBER SIGN
    '0x24':  '0x0024', // # DOLLAR SIGN
    '0x25':  '0x0025', // # PERCENT SIGN
    '0x26':  '0x0026', // # AMPERSAND
    '0x27':  '0x0027', // # APOSTROPHE
    '0x28':  '0x0028', // # LEFT PARENTHESIS
    '0x29':  '0x0029', // # RIGHT PARENTHESIS
    '0x2A':  '0x002A', // # ASTERISK
    '0x2B':  '0x002B', // # PLUS SIGN
    '0x2C':  '0x002C', // # COMMA
    '0x2D':  '0x002D', // # HYPHEN-MINUS
    '0x2E':  '0x002E', // # FULL STOP
    '0x2F':  '0x002F', // # SOLIDUS
    '0x30':  '0x0030', // # DIGIT ZERO
    '0x31':  '0x0031', // # DIGIT ONE
    '0x32':  '0x0032', // # DIGIT TWO
    '0x33':  '0x0033', // # DIGIT THREE
    '0x34':  '0x0034', // # DIGIT FOUR
    '0x35':  '0x0035', // # DIGIT FIVE
    '0x36':  '0x0036', // # DIGIT SIX
    '0x37':  '0x0037', // # DIGIT SEVEN
    '0x38':  '0x0038', // # DIGIT EIGHT
    '0x39':  '0x0039', // # DIGIT NINE
    '0x3A':  '0x003A', // # COLON
    '0x3B':  '0x003B', // # SEMICOLON
    '0x3C':  '0x003C', // # LESS-THAN SIGN
    '0x3D':  '0x003D', // # EQUALS SIGN
    '0x3E':  '0x003E', // # GREATER-THAN SIGN
    '0x3F':  '0x003F', // # QUESTION MARK
    '0x40':  '0x0040', // # COMMERCIAL AT
    '0x41':  '0x0041', // # LATIN CAPITAL LETTER A
    '0x42':  '0x0042', // # LATIN CAPITAL LETTER B
    '0x43':  '0x0043', // # LATIN CAPITAL LETTER C
    '0x44':  '0x0044', // # LATIN CAPITAL LETTER D
    '0x45':  '0x0045', // # LATIN CAPITAL LETTER E
    '0x46':  '0x0046', // # LATIN CAPITAL LETTER F
    '0x47':  '0x0047', // # LATIN CAPITAL LETTER G
    '0x48':  '0x0048', // # LATIN CAPITAL LETTER H
    '0x49':  '0x0049', // # LATIN CAPITAL LETTER I
    '0x4A':  '0x004A', // # LATIN CAPITAL LETTER J
    '0x4B':  '0x004B', // # LATIN CAPITAL LETTER K
    '0x4C':  '0x004C', // # LATIN CAPITAL LETTER L
    '0x4D':  '0x004D', // # LATIN CAPITAL LETTER M
    '0x4E':  '0x004E', // # LATIN CAPITAL LETTER N
    '0x4F':  '0x004F', // # LATIN CAPITAL LETTER O
    '0x50':  '0x0050', // # LATIN CAPITAL LETTER P
    '0x51':  '0x0051', // # LATIN CAPITAL LETTER Q
    '0x52':  '0x0052', // # LATIN CAPITAL LETTER R
    '0x53':  '0x0053', // # LATIN CAPITAL LETTER S
    '0x54':  '0x0054', // # LATIN CAPITAL LETTER T
    '0x55':  '0x0055', // # LATIN CAPITAL LETTER U
    '0x56':  '0x0056', // # LATIN CAPITAL LETTER V
    '0x57':  '0x0057', // # LATIN CAPITAL LETTER W
    '0x58':  '0x0058', // # LATIN CAPITAL LETTER X
    '0x59':  '0x0059', // # LATIN CAPITAL LETTER Y
    '0x5A':  '0x005A', // # LATIN CAPITAL LETTER Z
    '0x5B':  '0x005B', // # LEFT SQUARE BRACKET
    '0x5C':  '0x005C', // # REVERSE SOLIDUS
    '0x5D':  '0x005D', // # RIGHT SQUARE BRACKET
    '0x5E':  '0x005E', // # CIRCUMFLEX ACCENT
    '0x5F':  '0x005F', // # LOW LINE
    '0x60':  '0x0060', // # GRAVE ACCENT
    '0x61':  '0x0061', // # LATIN SMALL LETTER A
    '0x62':  '0x0062', // # LATIN SMALL LETTER B
    '0x63':  '0x0063', // # LATIN SMALL LETTER C
    '0x64':  '0x0064', // # LATIN SMALL LETTER D
    '0x65':  '0x0065', // # LATIN SMALL LETTER E
    '0x66':  '0x0066', // # LATIN SMALL LETTER F
    '0x67':  '0x0067', // # LATIN SMALL LETTER G
    '0x68':  '0x0068', // # LATIN SMALL LETTER H
    '0x69':  '0x0069', // # LATIN SMALL LETTER I
    '0x6A':  '0x006A', // # LATIN SMALL LETTER J
    '0x6B':  '0x006B', // # LATIN SMALL LETTER K
    '0x6C':  '0x006C', // # LATIN SMALL LETTER L
    '0x6D':  '0x006D', // # LATIN SMALL LETTER M
    '0x6E':  '0x006E', // # LATIN SMALL LETTER N
    '0x6F':  '0x006F', // # LATIN SMALL LETTER O
    '0x70':  '0x0070', // # LATIN SMALL LETTER P
    '0x71':  '0x0071', // # LATIN SMALL LETTER Q
    '0x72':  '0x0072', // # LATIN SMALL LETTER R
    '0x73':  '0x0073', // # LATIN SMALL LETTER S
    '0x74':  '0x0074', // # LATIN SMALL LETTER T
    '0x75':  '0x0075', // # LATIN SMALL LETTER U
    '0x76':  '0x0076', // # LATIN SMALL LETTER V
    '0x77':  '0x0077', // # LATIN SMALL LETTER W
    '0x78':  '0x0078', // # LATIN SMALL LETTER X
    '0x79':  '0x0079', // # LATIN SMALL LETTER Y
    '0x7A':  '0x007A', // # LATIN SMALL LETTER Z
    '0x7B':  '0x007B', // # LEFT CURLY BRACKET
    '0x7C':  '0x007C', // # VERTICAL LINE
    '0x7D':  '0x007D', // # RIGHT CURLY BRACKET
    '0x7E':  '0x007E', // # TILDE
    '0x7F':  '0x007F', // # DELETE
    '0x80':  '0x0080', // # <control>
    '0x81':  '0x0081', // # <control>
    '0x82':  '0x0082', // # <control>
    '0x83':  '0x0083', // # <control>
    '0x84':  '0x0084', // # <control>
    '0x85':  '0x0085', // # <control>
    '0x86':  '0x0086', // # <control>
    '0x87':  '0x0087', // # <control>
    '0x88':  '0x0088', // # <control>
    '0x89':  '0x0089', // # <control>
    '0x8A':  '0x008A', // # <control>
    '0x8B':  '0x008B', // # <control>
    '0x8C':  '0x008C', // # <control>
    '0x8D':  '0x008D', // # <control>
    '0x8E':  '0x008E', // # <control>
    '0x8F':  '0x008F', // # <control>
    '0x90':  '0x0090', // # <control>
    '0x91':  '0x0091', // # <control>
    '0x92':  '0x0092', // # <control>
    '0x93':  '0x0093', // # <control>
    '0x94':  '0x0094', // # <control>
    '0x95':  '0x0095', // # <control>
    '0x96':  '0x0096', // # <control>
    '0x97':  '0x0097', // # <control>
    '0x98':  '0x0098', // # <control>
    '0x99':  '0x0099', // # <control>
    '0x9A':  '0x009A', // # <control>
    '0x9B':  '0x009B', // # <control>
    '0x9C':  '0x009C', // # <control>
    '0x9D':  '0x009D', // # <control>
    '0x9E':  '0x009E', // # <control>
    '0x9F':  '0x009F', // # <control>
    '0xA0':  '0x00A0', // # NO-BREAK SPACE
    '0xA2':  '0x00A2', // # CENT SIGN
    '0xA3':  '0x00A3', // # POUND SIGN
    '0xA4':  '0x00A4', // # CURRENCY SIGN
    '0xA5':  '0x00A5', // # YEN SIGN
    '0xA6':  '0x00A6', // # BROKEN BAR
    '0xA7':  '0x00A7', // # SECTION SIGN
    '0xA8':  '0x00A8', // # DIAERESIS
    '0xA9':  '0x00A9', // # COPYRIGHT SIGN
    '0xAA':  '0x00D7', // # MULTIPLICATION SIGN
    '0xAB':  '0x00AB', // # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
    '0xAC':  '0x00AC', // # NOT SIGN
    '0xAD':  '0x00AD', // # SOFT HYPHEN
    '0xAE':  '0x00AE', // # REGISTERED SIGN
    '0xAF':  '0x00AF', // # MACRON
    '0xB0':  '0x00B0', // # DEGREE SIGN
    '0xB1':  '0x00B1', // # PLUS-MINUS SIGN
    '0xB2':  '0x00B2', // # SUPERSCRIPT TWO
    '0xB3':  '0x00B3', // # SUPERSCRIPT THREE
    '0xB4':  '0x00B4', // # ACUTE ACCENT
    '0xB5':  '0x00B5', // # MICRO SIGN
    '0xB6':  '0x00B6', // # PILCROW SIGN
    '0xB7':  '0x00B7', // # MIDDLE DOT
    '0xB8':  '0x00B8', // # CEDILLA
    '0xB9':  '0x00B9', // # SUPERSCRIPT ONE
    '0xBA':  '0x00F7', // # DIVISION SIGN
    '0xBB':  '0x00BB', // # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
    '0xBC':  '0x00BC', // # VULGAR FRACTION ONE QUARTER
    '0xBD':  '0x00BD', // # VULGAR FRACTION ONE HALF
    '0xBE':  '0x00BE', // # VULGAR FRACTION THREE QUARTERS
    '0xDF':  '0x2017', // # DOUBLE LOW LINE
    '0xE0':  '0x05D0', // # HEBREW LETTER ALEF
    '0xE1':  '0x05D1', // # HEBREW LETTER BET
    '0xE2':  '0x05D2', // # HEBREW LETTER GIMEL
    '0xE3':  '0x05D3', // # HEBREW LETTER DALET
    '0xE4':  '0x05D4', // # HEBREW LETTER HE
    '0xE5':  '0x05D5', // # HEBREW LETTER VAV
    '0xE6':  '0x05D6', // # HEBREW LETTER ZAYIN
    '0xE7':  '0x05D7', // # HEBREW LETTER HET
    '0xE8':  '0x05D8', // # HEBREW LETTER TET
    '0xE9':  '0x05D9', // # HEBREW LETTER YOD
    '0xEA':  '0x05DA', // # HEBREW LETTER FINAL KAF
    '0xEB':  '0x05DB', // # HEBREW LETTER KAF
    '0xEC':  '0x05DC', // # HEBREW LETTER LAMED
    '0xED':  '0x05DD', // # HEBREW LETTER FINAL MEM
    '0xEE':  '0x05DE', // # HEBREW LETTER MEM
    '0xEF':  '0x05DF', // # HEBREW LETTER FINAL NUN
    '0xF0':  '0x05E0', // # HEBREW LETTER NUN
    '0xF1':  '0x05E1', // # HEBREW LETTER SAMEKH
    '0xF2':  '0x05E2', // # HEBREW LETTER AYIN
    '0xF3':  '0x05E3', // # HEBREW LETTER FINAL PE
    '0xF4':  '0x05E4', // # HEBREW LETTER PE
    '0xF5':  '0x05E5', // # HEBREW LETTER FINAL TSADI
    '0xF6':  '0x05E6', // # HEBREW LETTER TSADI
    '0xF7':  '0x05E7', // # HEBREW LETTER QOF
    '0xF8':  '0x05E8', // # HEBREW LETTER RESH
    '0xF9':  '0x05E9', // # HEBREW LETTER SHIN
    '0xFA':  '0x05EA', // # HEBREW LETTER TAV
    '0xFD':  '0x200E', // # LEFT-TO-RIGHT MARK
    '0xFE':  '0x200F' // # RIGHT-TO-LEFT MARK
    }
    
    utf8_to_iso_8859_8(string);
    function utf8_to_iso_8859_8(s) {
      var utf8EncodedString = unescape(s);
      var splitUtf8 = utf8EncodedString.split('');
      splitUtf8.forEach(function(element, index) {
        var utfChar = element.charCodeAt(0).toString(16).toUpperCase();
        splitUtf8[index] = String.fromCharCode(charEncodings['0x'+utfChar]);
      });
      //splitUtf8 = splitUtf8.reverse();
      splitUtf8 = splitUtf8.toString().replace(/,/g, '');
      console.log(splitUtf8);
    }
    &#13;
    &#13;
    &#13;