FileReader。readAsBinaryString()返回UTF-8编码的二进制字符串。如何将数据作为一系列Unicode转义序列(\ uxxxx)来获取?
答案 0 :(得分:4)
不推荐使用FileReader.readAsBinaryString()
- 而是使用readAsArrayBuffer()。这允许您使用以下两种方法之一将输入字符串转换为转义的unicode字符:
它使用ArrayBuffer和Uint8Array视图。在下面的演示中,假设预加载缓冲区(而是提供一些虚拟数据)。
var buffer = new Uint8Array([0x20, 0xac, 0x2b, 0x08]), // big-endian format
pos = 0, txt = "";
// iterate buffer byte-per-byte and build string:
while(pos < buffer.length)
txt += "\\u" + toString(buffer[pos++]) + toString(buffer[pos++]);
// make sure we end up with two digits (v < 0x10)
function toString(v) {
var s = v.toString(16); return s.length === 1 ? "0" + s : s
}
out.innerHTML = txt;
<output id="out"></output>
如果数据是little-endian(在大多数主流系统上),你可以在这里使用Uint16Array,只需将单个值转换为字符串而不是两个。或者使用DataView,以便您可以阅读请求endianess。这可能会或可能不会稍快(浏览器将进行字节交换,我们在一个操作系统中读取16位,但检查只是在toString方法中合并):
var data = new Uint8Array([0x20, 0xac, 0x2b, 0x08]), // big-endian format
view = new DataView(data.buffer), // use a view on the ArrayBuffer
pos = 0, txt = "";
// iterate buffer byte-per-byte and build string:
while(pos < view.byteLength) {
txt += "\\u" + toString(view.getUint16(pos, false)); // true = little endian
pos += 2
};
// make sure we end up with four digits
function toString(v) {
var s = v.toString(16);
return s.length === 3 ? "0" + s : (s.length === 2 ? "00" + s : s)
}
out.innerHTML = txt;
<output id="out"></output>
这使用新的TextDecoder API来解析输入缓冲区 - 这里也假设是一个ArrayBuffer。
然后escape
与替换一起使用。这是一种快速转换方式,但也不推荐使用escape()
。但是,它不会很快到任何地方,所以如果你觉得大胆,它可能是一个选项 - 我会在任何情况下把它包括在内:
var td = new TextDecoder("utf-16be"), // be = big endian, def: le
buffer = new Uint8Array([0x20, 0xac, 0x2b, 0x08]); // big-endian format
// assumes data loaded into an ArrayBuffer
var txt = td.decode(buffer);
// escape is deprecated but won't go anywhere for a while:
out.innerHTML = escape(txt).replace(/%/g, "\\");
// or use the same last step as in method 1, just showing an alternative way
//=> "\u20AC\u2B08"
<output id="out"></output>
注意:您可能已经注意到我已经为字节顺序指示了big-endian。通常,在从网络读取文件或二进制数据时使用big-endian(也称为网络顺序)。如果数据恰好是小端格式,则需要交换字节顺序:
对于方法1 ,您可以这样做:
while(pos < buffer.length) {
txt += "\\u" + toString(buffer[pos+1]) + toString(buffer[pos]);
pos += 2;
}
或者仅使用Uint16Array和上面提到的修改过的toString方法。
对于方法2 ,您只需为utf-16指定little-endian版本:
var td = new TextDecoder("utf-16"); // default = little-endian
请注意,TextDecoder
尚未稳定 nor supported in all browsers 。
答案 1 :(得分:3)
我想我可能已经为您找到了解决方案。
On this site我发现了一个开源项目,它将文本转换为Unicode表示法。
我编辑了与项目相关的函数,并创建了一个小函数来处理打开的文件。
/*
Copyright (C) 2007 Richard Ishida ishida@w3.org
This program is free software; you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version as long as you point to
http://rishida.net/ in your code.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details. http://www.gnu.org/licenses/gpl.html
*/
function dec2hex(textString) {
return (textString + 0).toString(16).toUpperCase();
}
function convertCharStr2Unicode(textString, preserve, pad) {
// converts a string of characters to U+... notation, separated by space
// textString: string, the string to convert
// preserve: string enum [ascii, latin1], a set of characters to not convert
// pad: boolean, if true, hex numbers lower than 1000 are padded with zeros
var haut = 0;
var n = 0;
var CPstring = '';
for (var i = 0; i < textString.length; i++) {
var b = textString.charCodeAt(i);
if (b < 0 || b > 0xFFFF) {
CPstring += 'Error in convertChar2CP: byte out of range ' + dec2hex(b) + '!';
}
if (haut != 0) {
if (0xDC00 <= b && b <= 0xDFFF) {
CPstring += dec2hex(0x10000 + ((haut - 0xD800) << 10) + (b - 0xDC00)) + ' ';
haut = 0;
continue;
} else {
CPstring += 'Error in convertChar2CP: surrogate out of range ' + dec2hex(haut) + '!';
haut = 0;
}
}
if (0xD800 <= b && b <= 0xDBFF) {
haut = b;
} else {
if (b <= 127 && preserve == 'ascii') {
CPstring += textString.charAt(i) + ' ';
} else if (b <= 255 && preserve == 'latin1') {
CPstring += textString.charAt(i) + ' ';
} else {
cp = dec2hex(b);
if (pad) {
while (cp.length < 4) {
cp = '0' + cp;
}
}
CPstring += '\\u' + cp + ' ';
}
}
}
return CPstring.substring(0, CPstring.length - 1);
}
演示(使用文件处理和文本输入):http://jsfiddle.net/howderek/btp6zd50/
答案 2 :(得分:2)
此函数将转义非ascii字符并将unicode转换回\ uHHHH。
function ascii(str) {
var s = ""
for (var i = 0, len = str.length; i < len; i++) {
var n = str.charCodeAt(i);
if (n >= 32 && n <= 126) {
// printable ASCII
s += str.charAt(i);
} else {
// unicode escape everything else
n = n.toString(16);
n = "0000".substr(n.length) + n;
s+= "\\u" + n;
}
}
return s
}
您可能希望保留文本格式或将其编码为\ n,\ r,\ t。如果需要编辑文本可能会有所帮助。
var x = "♫\n☆\n⚛\n☯\n⚓\n";
console.log(x);
function ascii(str, formatting, convert) {
var s = ""
var TAB = 9, LF = 10, CR = 13;
for (var i = 0, len = str.length; i < len; i++) {
var n = str.charCodeAt(i);
if (n >= 32 && n <= 126) {
// printable ASCII
s += str.charAt(i);
} else if(formatting === true && (n === TAB || n === LF || n === CR)) {
if (convert === true) {
s += n === TAB ? "\\t" : n === LF ? "\\n" : "\\r";
} else {
s += str.charAt(i);
}
} else {
// unicode escape everything else
n = n.toString(16);
n = "0000".substr(n.length) + n;
s+= "\\u" + n;
}
}
return s;
}
console.log(ascii(x));
console.log(ascii(x, true));
console.log(ascii(x, true, true));
输出:
♫
☆
⚛
☯
⚓
\u266b\u000a\u2606\u000a\u269b\u000a\u262f\u000a\u2693\u000a
\u266b
\u2606
\u269b
\u262f
\u2693
\u266b\n\u2606\n\u269b\n\u262f\n\u2693\n
答案 3 :(得分:0)
FileReader.readAsBinaryString()
不在
W3C File API working draft
使用Blob
:
xhr.responseType = 'blob';
在下面的代码片段中,我获取了一个图像,但您可以获得任何二进制文件类型。
var fileDisplayArea= document.getElementById('fileDisplayArea');
var testFrame= document.getElementById('testFrame');
window.URL = window.URL || window.webkitURL; // Take care of vendor prefixes.
var xhr = new XMLHttpRequest();
// console.log('has CORS',("withCredentials" in xhr));
xhr.open('GET', 'http://i.imgur.com/Ar1SBEH.png', true);
xhr.responseType = 'blob';
xhr.onload = function(e) {
if (this.status == 200) {
var blob = this.response; // becomes type Blob
console.log(blob);
var url = window.URL.createObjectURL(blob);
testFrame.src = url;
var img = document.createElement('img');
img.onload = function(e) {
window.URL.revokeObjectURL(img.src); // Clean up after yourself.
};
img.src = window.URL.createObjectURL(blob);
fileDisplayArea.appendChild(img);
}
};
xhr.send();
<iframe id="testFrame" src=""></iframe>
<div id="fileDisplayArea"></div>