JavaScript等效于Java的Charset / String类组合,用于解码字节数组

时间:2014-02-06 21:51:13

标签: java javascript

在Java中,如果我们知道字节数组的编码,我们可以解码它并获得相应的字符,如下所示 -

Charset charset = Charset.forName(encoding);
String decodedString = new String(byteArray, charset);

如何在JavaScript中实现相同的结果?

假设我读了一个我知道是windows-1253编码的文件(希腊文)。为了正确显示文件内容,我必须解码文件中的字节。

如果我们不解码(或在不知道编码的文本编辑器中打开文件),我们可能会看到类似的内容 -

ÁõôÞ åßíáé ç åëëçíéêÞ.

但是当这个文本(即字节)被解码时,我们得到

Αυτή είναι η ελληνική.

2 个答案:

答案 0 :(得分:0)

JavaScript字符串中的

始终采用UTF-16编码。 ECMAScript

答案 1 :(得分:0)

希望这会对你有所帮助:

var getString = function (strBytes) {

    var MAX_SIZE = 0x4000;
    var codeUnits = [];
    var highSurrogate;
    var lowSurrogate;
    var index = -1;

    var result = '';

    while (++index < strBytes.length) {
        var codePoint = Number(strBytes[index]);

    if (codePoint === (codePoint & 0x7F)) {


    } else if (0xF0 === (codePoint & 0xF0)) {
        codePoint ^= 0xF0;
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
    } else if (0xE0 === (codePoint & 0xE0)) {
        codePoint ^= 0xE0;
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
    } else if (0xC0 === (codePoint & 0xC0)) {
        codePoint ^= 0xC0;
        codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
    } 

        if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint)
            throw RangeError('Invalid code point: ' + codePoint);

        if (codePoint <= 0xFFFF)
            codeUnits.push(codePoint);
        else {
            codePoint -= 0x10000;
            highSurrogate = (codePoint >> 10) | 0xD800;
            lowSurrogate = (codePoint % 0x400) | 0xDC00;
            codeUnits.push(highSurrogate, lowSurrogate);
        }
        if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) {
            result += String.fromCharCode.apply(null, codeUnits);
            codeUnits.length = 0;
        }
    }

    return result;
}

一切顺利!