在Java中,如果我们知道字节数组的编码,我们可以解码它并获得相应的字符,如下所示 -
Charset charset = Charset.forName(encoding);
String decodedString = new String(byteArray, charset);
如何在JavaScript中实现相同的结果?
假设我读了一个我知道是windows-1253编码的文件(希腊文)。为了正确显示文件内容,我必须解码文件中的字节。
如果我们不解码(或在不知道编码的文本编辑器中打开文件),我们可能会看到类似的内容 -
ÁõôÞ åßíáé ç åëëçíéêÞ.
但是当这个文本(即字节)被解码时,我们得到
Αυτή είναι η ελληνική.
答案 0 :(得分:0)
始终采用UTF-16编码。 ECMAScript
答案 1 :(得分:0)
希望这会对你有所帮助:
var getString = function (strBytes) {
var MAX_SIZE = 0x4000;
var codeUnits = [];
var highSurrogate;
var lowSurrogate;
var index = -1;
var result = '';
while (++index < strBytes.length) {
var codePoint = Number(strBytes[index]);
if (codePoint === (codePoint & 0x7F)) {
} else if (0xF0 === (codePoint & 0xF0)) {
codePoint ^= 0xF0;
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
} else if (0xE0 === (codePoint & 0xE0)) {
codePoint ^= 0xE0;
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
} else if (0xC0 === (codePoint & 0xC0)) {
codePoint ^= 0xC0;
codePoint = (codePoint << 6) | (strBytes[++index] ^ 0x80);
}
if (!isFinite(codePoint) || codePoint < 0 || codePoint > 0x10FFFF || Math.floor(codePoint) != codePoint)
throw RangeError('Invalid code point: ' + codePoint);
if (codePoint <= 0xFFFF)
codeUnits.push(codePoint);
else {
codePoint -= 0x10000;
highSurrogate = (codePoint >> 10) | 0xD800;
lowSurrogate = (codePoint % 0x400) | 0xDC00;
codeUnits.push(highSurrogate, lowSurrogate);
}
if (index + 1 == strBytes.length || codeUnits.length > MAX_SIZE) {
result += String.fromCharCode.apply(null, codeUnits);
codeUnits.length = 0;
}
}
return result;
}
一切顺利!