Bash替代(shell设置为UTF8):
输入:
in.json
$ file -I in.json
in.json: text/plain; charset=utf-8
{ “它-它”: “的Città”}
Bash命令我需要JS替代:
$ iconv -f utf8 -t latin1 in.json > out.json
out.json
$ file -I in.json
out.json: text/plain; charset=iso-8859-1
{ “它-它”: “CITT”}
当从输入类型=“文件”读取为base64时,Javascript在浏览器中看到in.json的内容(尽管内容类型和脚本编码设置为utf8):
{"it-it":"Città "}
Javascript在浏览器中看到out.json:
{"it-it":"Città"}
问题 - 如何以最原生的Javascript方式制作大多数现代浏览器转换此utf8字符串
({"it-it":"Città "} as latin1 and {"it-it":"Città"} as utf8)
到latin1字符串?
我更喜欢原生解决方案,或者最糟糕的情况是JQuery,请尽量不要使用npm +节点依赖地狱来解决它。
P.s。:我只需要支持最现代的浏览器,这适用于仅限管理员的页面。
答案 0 :(得分:0)
下面我创建了一个iso-8859-1
版本为CittÃ
的数组,然后使用TextDecoder对其进行解码。
因此,如果您可以获得JSON的二进制版本,则应该可以为您进行转换。
//CittÃ
var latinSource = new Uint8Array([67, 105, 116, 116, 195]);
var tc = new TextDecoder("iso-8859-1");
console.log(tc.decode(latinSource));
答案 1 :(得分:0)
对我来说,'new TextDecoder("iso-8859-1")' 不起作用...
1.
var latinSource = new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255]);
var tc = new TextDecoder("iso-8859-1");
console.log(tc.decode(latinSource)); //return windows-1252 string
我明白了,结果,它不是 latin1 字符串,因为它包含字符 '€'。
2.
//windows-1252
console.log('new TextDecoder("iso-8859-1")', new TextDecoder("iso-8859-1"));
// ---->
//new TextDecoder("iso-8859-1") {
// "encoding": "windows-1252",
// "fatal": false,
// "ignoreBOM": false,
// "decode": function decode() { [native code] }
//}
//Decode Latin1-string (iso-8859-1 encoded string) -> into Uint8Array
function Latin1ToUint8Array(iso_8859_1){
var uInt8Arr = new Uint8Array(iso_8859_1.length);
for(var i=0; i<iso_8859_1.length; i++){
uInt8Arr[i] = iso_8859_1.charCodeAt(i);
}
return uInt8Arr;
}
//encode Uint8Array -> into iso-8859-1 encoded string (latin1-string)
function Uint8ToLatin1Str(Uint8Arr){
var iso_8859_1_string = '';
for(var i=0; i<Uint8Arr.length; i++){iso_8859_1_string+= String.fromCharCode(Uint8Arr[i]);}
return iso_8859_1_string;
}
var latinSource = new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255]);
console.log( Uint8ToLatin1Str(latinSource) ); //valid latin1-string (iso-8859-1)
function Windows1252EncodeDecode(
cp1252 //string (to encode into bytes), or Uint8Array (to decode into string)
){
var replaceCharCodesForLatin1 = {
//_______________________________________________________________________
//|"windows-1252"| iso-8859-1 | //Unicode |
//|'character' | charcode, | //charcode(commented), |
//|______________|______________________|_______________________________|
'€' : 128, //8364,
'‚' : 130, //8218,
'ƒ' : 131, //402,
'„' : 132, //8222,
'…' : 133, //8230,
'†' : 134, //8224,
'‡' : 135, //8225,
'ˆ' : 136, //710,
'‰' : 137, //8240,
'Š' : 138, //352,
'‹' : 139, //8249,
'Œ' : 140, //338,
'Ž' : 142, //381,
'‘' : 145, //8216,
'’' : 146, //8217,
'“' : 147, //8220,
'”' : 148, //8221,
'•' : 149, //8226,
'–' : 150, //8211,
'—' : 151, //8212,
'˜' : 152, //732,
'™' : 153, //8482,
'š' : 154, //353,
'›' : 155, //8250,
'œ' : 156, //339,
'ž' : 158, //382,
'Ÿ' : 159, //376
};
if(typeof cp1252 === 'string'){ //if that was been string to encode to bytes
var resultUint8 = new Uint8Array(cp1252.length);
for(var i = 0; i<cp1252.length; i++){
var charCode = cp1252[i].charCodeAt(0);
resultUint8[i] = ((charCode>256) ? replaceCharCodesForLatin1[cp1252[i]] : charCode);
}
return resultUint8; //return Uint8Array
}else if(cp1252 instanceof Uint8Array){ //else if that was been Uint8Array to decode to string
var resultString = "";
for(var i = 0; i<cp1252.length; i++){
var charCode = (Object.keys(replaceCharCodesForLatin1).find(key => replaceCharCodesForLatin1[key] === cp1252[i]));
charCode = (typeof charCode === 'undefined') ? String.fromCharCode(cp1252[i]) : charCode;
resultString += charCode;
}
return resultString; //return Uint8Array
}
}
var latinSource = new Uint8Array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255]);
var windows1252 = new TextDecoder("iso-8859-1").decode(latinSource); //windows-1252 string on output
console.log('new TextDecoder("iso-8859-1").decode(latinSource)', (new TextDecoder("iso-8859-1").decode(latinSource)))
var bytesBack = Windows1252EncodeDecode(windows1252);
console.log('bytesBack', bytesBack.toString());
var Windows1252StringBack = Windows1252EncodeDecode(bytesBack)
console.log('string back', Windows1252StringBack);
console.log('Compare with TextDecoder', (Windows1252StringBack === windows1252 ));
function isLatin1String(str){return (str.match(/[^\u0000-\u00FF]/) === null);} //check is string "iso-8859-1"-encoded or not (true/false)
//Decode Latin1 or utf-8 string -> into Uint8Array
function StringToUint8Array(str){
if(!isLatin1String(str)){
return new TextEncoder("utf-8").encode(str); //encode to bytes as utf-8
}
//else, as ASCII-compatible latin1-string
var uInt8Arr = new Uint8Array(str.length);
for(var i=0; i<str.length; i++){
uInt8Arr[i] = str.charCodeAt(i);
}
return uInt8Arr;
}
//encode Uint8Array -> to latin1-string
function Uint8ToStr(Uint8Arr){
var iso_8859_1_string = '';
for(var i=0; i<Uint8Arr.length; i++){iso_8859_1_string+= String.fromCharCode(Uint8Arr[i]);}
return iso_8859_1_string;
}
function latin1ToUtf8(latin1str){
return new TextDecoder("utf-8").decode(StringToUint8Array(latin1str));
}
console.log('StringToUint8Array("CittÃ")', StringToUint8Array("CittÃ")); //Latin1
console.log('StringToUint8Array("Città€")', StringToUint8Array("Città€")); //utf-8
console.log('Uint8ToStr(StringToUint8Array("CittÃ"))', Uint8ToStr(StringToUint8Array("CittÃ"))); //latin1
console.log('Uint8ToStr(StringToUint8Array("Città"))', Uint8ToStr(StringToUint8Array("Città€"))); //utf-8
console.log('latin1ToUtf8(Uint8ToStr(StringToUint8Array("Città€")))', latin1ToUtf8(Uint8ToStr(StringToUint8Array("Città€")))); //utf-8