我想在C / C ++程序中将字符串/ char转换为\ uxxx格式。 支持我有一个字符'A',我想打印转换为\ u0041(标准unicode)。
第二件事是我使用unix命令实用程序来打印(printf)以打印\ uxxx字符串tto char。我尝试使用“\ u092b”它打印的字符与我的字体文件不同。任何人都可以解释这背后的原因。
答案 0 :(得分:0)
这是使用标准C ++执行此操作的函数(尽管取决于CharT
,它可能有一些要求,某些有效的实现定义的行为不符合)。
#include <codecvt>
#include <sstream>
#include <iomanip>
#include <iostream>
template<typename CharT,typename traits,typename allocator>
std::basic_string<CharT,traits,allocator>
to_uescapes(std::basic_string<CharT,traits,allocator> const &input)
{
// string converter from CharT to char. If CharT = char then no conversion is done.
// if CharT is char32_t or char16_t then the conversion is UTF-32/16 -> UTF-8. Not all implementations support this yet.
// if CharT is something else then this uses implementation defined encodings and will only work for us if the implementation uses UTF-8 as the narrow char encoding
std::wstring_convert<std::codecvt<CharT,char,std::mbstate_t>,CharT> convertA;
// string converter from UTF-8 -> UTF-32. Not all implementations support this yet
std::wstring_convert<std::codecvt<char32_t,char,std::mbstate_t>,char32_t> convertB;
// convert from input encoding to UTF-32 (Assuming convertA produces UTF-8 string)
std::u32string u32input = convertB.from_bytes(convertA.to_bytes(input));
std::basic_stringstream<CharT,traits,allocator> ss;
ss.fill('0');
ss << std::hex;
for(char32_t c : u32input) {
if(c < U'\U00010000')
ss << convertA.from_bytes("\\u") << std::setw(4) << (unsigned int)c;
else
ss << convertA.from_bytes("\\U") << std::setw(8) << (unsigned int)c;
}
return ss.str();
}
template<typename CharT>
std::basic_string<CharT>
to_uescapes(CharT const *input)
{
return to_uescapes(std::basic_string<CharT>(input));
}
int main() {
std::string s = to_uescapes(u8"Hello \U00010000");
std::cout << s << '\n';
}
这应该打印:
\ u0048 \ u0065 \ u006c \ u006c \ u006f \ u0020 \ U00010000