我有一个SELECT empName, salary, department
FROM (SELECT t.*,
DENSE_RANK() OVER (ORDER BY avg_salary) as seqnum
FROM (SELECT t1.empName, t2.salary, t2.department,
AVG(t2.salary) OVER (PARTITION BY t2.department) as avg_salary
FROM table1 t1 INNER JOIN
table2 t2
ON t1.empID = t2.empID
) t
) t
WHERE seqnum = 1;
元素数组,每个元素都存储一个非拉丁Unicode字符的代码点。如何在控制台上打印它们或将它们作为UTF-8编码字符存储在文件中?我知道他们可能无法在控制台上正确呈现,但如果我在兼容的编辑器中打开它们,它们应该显示正常。
我尝试过使用uint32_t
和wprintf(L"%lc", UINT32_T_VARIABLE)
,但无济于事。
答案 0 :(得分:1)
您必须先选择正确的区域设置:
#include <locale.h>
setlocale(LC_ALL, "C.UTF-8");
或
setlocale(LC_ALL, "en_US.UTF-8");
然后以printf
格式使用fprintf
或%lc
:
printf("%lc", UINT32_T_VARIABLE);
这仅适用于小到足以适合wchar_t
的Unicode代码点。对于更完整和可移植的解决方案,您可能需要自己实现Unicode到UTF-8的转换,这不是很困难。
答案 1 :(得分:0)
最好在可用时使用现有代码。
滚动自己的Unicode代码点到UTF8很简单,但很容易搞砸。答案需要2次修改才能修复。 @Jonathan Leffler @chqrlie,因此建议对任何自编码解决方案进行严格测试。以下是经过轻度测试的代码,用于将代码点转换为数组 请注意,结果不是字符串。
// Populate utf8 with 0-4 bytes
// Return length used in utf8[]
// 0 implies bad codepoint
unsigned Unicode_CodepointToUTF8(uint8_t *utf8, uint32_t codepoint) {
if (codepoint <= 0x7F) {
utf8[0] = codepoint;
return 1;
}
if (codepoint <= 0x7FF) {
utf8[0] = 0xC0 | (codepoint >> 6);
utf8[1] = 0x80 | (codepoint & 0x3F);
return 2;
}
if (codepoint <= 0xFFFF) {
// detect surrogates
if (codepoint >= 0xD800 && codepoint <= 0xDFFF) return 0;
utf8[0] = 0xE0 | (codepoint >> 12);
utf8[1] = 0x80 | ((codepoint >> 6) & 0x3F);
utf8[2] = 0x80 | (codepoint & 0x3F);
return 3;
}
if (codepoint <= 0x10FFFF) {
utf8[0] = 0xF0 | (codepoint >> 18);
utf8[1] = 0x80 | ((codepoint >> 12) & 0x3F);
utf8[2] = 0x80 | ((codepoint >> 6) & 0x3F);
utf8[3] = 0x80 | (codepoint & 0x3F);
return 4;
}
return 0;
}
// Sample usage
uint32_t cp = foo();
uint8_t utf8[4];
unsigned len = Unicode_CodepointToUTF8(utf8, cp);
if (len == 0) Handle_BadCodePoint();
size_t y = fwrite(utf8, 1, len, stream_opened_in_binary_mode);