我不能整齐地使用UTF16编码的字符串。这是解析UTF16编码的字符串的正确方法吗?这适用于ascii编码,但不适用于UTF16。我哪里错了?
我正在用此代码获取空HTML文档。
int rc = -1;
TidyDoc tdoc = tidyCreate();
TidyBuffer output = { 0 };
TidyBuffer errbuf = { 0 };
bool ok = tidyOptSetBool(tdoc, TidyXhtmlOut, yes); // Convert to XHTML
ok = tidyOptSetBool(tdoc, TidyForceOutput, yes);
rc = tidySetInCharEncoding(tdoc, "utf16le");
rc = tidySetOutCharEncoding(tdoc, "utf16le");
char* test = (char*)L"<html> \
<head><meta name = 'author' content = 'John Doe'>< / head> \
<body> \
<h1>My First Heading< / h1> \
<p>My first paragraph.< / p> \
< / body> \
< / html>";
if (rc >= 0)
{
rc = tidySetErrorBuffer(tdoc, &errbuf); // Capture diagnostics
}
if (rc >= 0)
{
rc = tidyParseString(tdoc, test); // Parse the input
}
if (rc >= 0)
{
//rc = tidyCleanAndRepair(tdoc); // Tidy it up!
}
if (extractText == true)
{
htmlText = new wstring();
tidyOptSetBool(tdoc, TidyOutputBOM, no);
//iterateNode(tdoc, tidyGetBody(tdoc), *htmlText);
}
rc = tidyRunDiagnostics(tdoc);
if (rc >= 0)
{
// we dont want utf16 BOM
ok = tidyOptSetBool(tdoc, TidyOutputBOM, no);
rc = tidySaveBuffer(tdoc, &output);
}
如果我更改为
rc = tidySetInCharEncoding(tdoc, "ascii");
rc = tidySetOutCharEncoding(tdoc, "utf16le");
char* test = "<html>...
有效