在JavaScript中与Python的chr()对应

时间:2012-07-29 10:14:27

标签: javascript python node.js fromcharcode

JavaScript方法String.fromCharCode()在以下意义上与Python的unichar()等效:

print unichr(213) # prints Õ on the console 
console.log(String.fromCharCode(213)); // prints Õ on the console as well

但是,出于我的目的,我需要一个与Python函数chr()等效的JavaScript。是否有这样的JavaScript函数或使String.fromCharCode()表现得像chr()的方法?

也就是说,我需要一些模仿

的JavaScript
print chr(213) # prints � on the console

2 个答案:

答案 0 :(得分:2)

结果证明你只想使用node.js中的原始字节there's a module for that。如果你是一个真正的巫师,你可以让这些东西单独使用javascript字符串,但它更难,而且效率低得多。

var b = new Buffer(1);
b[0] = 213;

console.log(b.toString()); //�


var b = new Buffer(3);
b[0] = 0xE2;
b[1] = 0x98;
b[2] = 0x85;

console.log(b.toString()); //★

print chr(213) # prints � on the console

所以这会打印一个原始字节(0xD5),它以UTF-8(最有可能)解释,它不是有效的UTF-8字节序列,因此显示为替换字符( )。 / p>

UTF-8的解释与此无关,您很可能只想要原始字节。

要在javascript中创建原始字节,您可以使用UInt8Array

var a = new Uint8Array(1);
a[0] = 213;

您可以可选然后将原始字节解释为utf-8:

console.log( utf8decode(a)); // "�"

//Not recommended for production use ;D
//Doesn't handle > BMP to keep the answer shorter
function utf8decode(uint8array) {
    var codePoints = [],
        i = 0,
        byte, codePoint, len = uint8array.length;
    for (i = 0; i < len; ++i) {
        byte = uint8array[i];

        if ((byte & 0xF8) === 0xF0 && len > i + 3) {

            codePoint = ((byte & 0x7) << 18) | ((uint8array[++i] & 0x3F) << 12) | ((uint8array[++i] & 0x3F) << 6) | (uint8array[++i] & 0x3F);
            if (!(0xFFFF < codePoint && codePoint <= 0x10FFFF)) {
                codePoints.push(0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD);
            } else {
                codePoints.push(codePoint);
            }
        } else if ((byte & 0xF0) === 0xE0 && len > i + 2) {

            codePoint = ((byte & 0xF) << 12) | ((uint8array[++i] & 0x3F) << 6) | (uint8array[++i] & 0x3F);
            if (!(0x7FF < codePoint && codePoint <= 0xFFFF)) {
                codePoints.push(0xFFFD, 0xFFFD, 0xFFFD);
            } else {
                codePoints.push(codePoint);
            }
        } else if ((byte & 0xE0) === 0xC0  && len > i + 1) {

            codePoint = ((byte & 0x1F) << 6) | ((uint8array[++i] & 0x3F));
            if (!(0x7F < codePoint && codePoint <= 0x7FF)) {
                codePoints.push(0xFFFD, 0xFFFD);
            } else {
                codePoints.push(codePoint);
            }
        } else if ((byte & 0x80) === 0x00) {
            codePoints.push(byte & 0x7F);
        } else {
            codePoints.push(0xFFFD);
        }
    }
    return String.fromCharCode.apply(String, codePoints);
}

你最有可能尝试做的事情与尝试将字节解释为utf8无关。

另一个例子:

//UTF-8 For the black star U+2605 ★:
var a = new Uint8Array(3);
a[0] = 0xE2;
a[1] = 0x98;
a[2] = 0x85;
utf8decode(a) === String.fromCharCode(0x2605) //True
utf8decode(a) // ★

在python 2.7(Ubuntu)中:

print chr(0xE2) + chr(0x98) + chr(0x85)
#prints ★

答案 1 :(得分:1)

如果你想要这个&#34;问号在框中&#34;对于不在标准ASCII表中的每个数字,这个小函数怎么样?

function chr(c) {
    return (c < 0 || c > 126) ? '�' : String.fromCharCode(c);
}