Question

我的C程序粘贴在下面。在bash中，程序打印“char is”，Ω 没有打印。我的语言环境都是en_US.utf8。

#include <stdio.h>
#include <wchar.h>
#include <stdlib.h>

int main() {
   int r;
   wchar_t myChar1 = L'Ω';
   r = wprintf(L"char is %c\n", myChar1);
}

Answer 1

这非常有趣。显然，编译器将omega从UTF-8转换为UNICODE，但不知何故，libc将它弄乱了。

首先：%c - 格式说明符需要char（即使在wprintf - 版本中），因此您必须指定%lc（因此{{} 1}}用于字符串）。

其次，如果你运行你的代码，那么语言环境设置为%ls（它不会自动从环境中获取）。您必须使用空字符串调用setlocale以从环境中获取区域设置，以便libc再次开心。

Answer 2

除了建议修复LIBC的答案之外，您可以这样做：

#include <stdio.h>
#include <wchar.h>
#include <stdlib.h>

// NOTE: *NOT* thread safe, not re-entrant
const char* unicode_to_utf8(wchar_t c)
{
    static unsigned char b_static[5];
    unsigned char* b = b_static; 

    if (c<(1<<7))// 7 bit Unicode encoded as plain ascii
    {
        *b++ = (unsigned char)(c);
    }
    else if (c<(1<<11))// 11 bit Unicode encoded in 2 UTF-8 bytes
    {
        *b++ = (unsigned char)((c>>6)|0xC0);
        *b++ = (unsigned char)((c&0x3F)|0x80);
    }
    else if (c<(1<<16))// 16 bit Unicode encoded in 3 UTF-8 bytes
        {
        *b++ = (unsigned char)(((c>>12))|0xE0);
        *b++ =  (unsigned char)(((c>>6)&0x3F)|0x80);
        *b++ =  (unsigned char)((c&0x3F)|0x80);
    }

    else if (c<(1<<21))// 21 bit Unicode encoded in 4 UTF-8 bytes
    {
        *b++ = (unsigned char)(((c>>18))|0xF0);
        *b++ = (unsigned char)(((c>>12)&0x3F)|0x80);
        *b++ = (unsigned char)(((c>>6)&0x3F)|0x80);
        *b++ = (unsigned char)((c&0x3F)|0x80);
    }
    *b = '\0';
    return b_static;
}


int main() {
    int r;
    wchar_t myChar1 = L'Ω';
    r = printf("char is %s\n", unicode_to_utf8(myChar1));
    return 0;
}

Answer 3

在输出之前使用{glib，libiconv，ICU}将其转换为UTF-8。

将wchar打印到Linux控制台？

3 个答案: