我对unescaping文本感兴趣,例如:\
映射到C中的\
。有没有人知道一个好的库?
答案 0 :(得分:36)
今天我有空闲时间从头开始编写解码器:entities.c,entities.h。
唯一具有外部链接的功能是
size_t decode_html_entities_utf8(char *dest, const char *src);
如果src
是空指针,则字符串将取自dest
,即实体将被就地解码。否则,解码后的字符串将被放入dest
- 它应该指向一个足以容纳strlen(src) + 1
个字符的缓冲区 - 而src
将保持不变。
该函数将返回已解码字符串的长度。
请注意,我没有进行任何广泛的测试,因此很有可能出现错误......
答案 1 :(得分:1)
对于C中用于解码这些HTML实体的另一个开源引用,您可以查看命令行实用程序uni2ascii / ascii2uni。相关文件是enttbl。{c,h}用于实体查找,而putu8.c是从UTF32向下转换为UTF8。
答案 2 :(得分:0)
我写了自己的unescape代码;非常简化,但工作:pn_util.c
答案 3 :(得分:0)
功能描述:将特殊的HTML实体转换回字符。 需要进行一些修改以满足您的要求。
char* HtmlSpecialChars_Decode(char* encodedHtmlSpecialEntities)
{
int encodedLen = 0;
int escapeArrayLen = 0;
static char decodedHtmlSpecialChars[TITLE_SIZE];
char innerHtmlSpecialEntities[MAX_CONFIG_ITEM_SIZE];
/* This mapping table can be extended if necessary. */
static const struct {
const char* encodedEntity;
const char decodedChar;
} entityToChars[] = {
{"<", '<'},
{">", '>'},
{"&", '&'},
{""", '"'},
{"'", '\''},
};
if(strchr(encodedHtmlSpecialEntities, '&') == NULL)
return encodedHtmlSpecialEntities;
memset(decodedHtmlSpecialChars, '\0', TITLE_SIZE);
memset(innerHtmlSpecialEntities, '\0', MAX_CONFIG_ITEM_SIZE);
escapeArrayLen = sizeof(entityToChars) / sizeof(entityToChars[0]);
strcpy(innerHtmlSpecialEntities, encodedHtmlSpecialEntities);
encodedLen = strlen(innerHtmlSpecialEntities);
for(int i = 0; i < encodedLen; i++)
{
if(innerHtmlSpecialEntities[i] == '&')
{
/* Potential encode char. */
char * tempEntities = innerHtmlSpecialEntities + i;
for(int j = 0; j < escapeArrayLen; j++)
{
if(strncmp(tempEntities, entityToChars[j].encodedEntity, strlen(entityToChars[j].encodedEntity)) == 0)
{
int index = 0;
strncat(decodedHtmlSpecialChars, innerHtmlSpecialEntities, i);
index = strlen(decodedHtmlSpecialChars);
decodedHtmlSpecialChars[index] = entityToChars[j].decodedChar;
if(strlen(tempEntities) > strlen(entityToChars[j].encodedEntity))
{
/* Not to the end, continue */
char temp[MAX_CONFIG_ITEM_SIZE] = {'\0'};
strcpy(temp, tempEntities + strlen(entityToChars[j].encodedEntity));
memset(innerHtmlSpecialEntities, '\0', MAX_CONFIG_ITEM_SIZE);
strcpy(innerHtmlSpecialEntities, temp);
encodedLen = strlen(innerHtmlSpecialEntities);
i = -1;
}
else
encodedLen = 0;
break;
}
}
}
}
if(encodedLen != 0)
strcat(decodedHtmlSpecialChars, innerHtmlSpecialEntities);
return decodedHtmlSpecialChars;
}
答案 4 :(得分:0)
QString UNESC(const QString &txt) {
QStringList bld;
static QChar AMP = '&', SCL = ';';
static QMap<QString, QString> dec = {
{"<", "<"}, {">", ">"}
, {"&", "&"}, {""", R"(")"}, {"'", "'"} };
if(!txt.contains(AMP)) { return txt; }
int bgn = 0, pos = 0;
while((pos = txt.indexOf(AMP, pos)) != -1) {
int end = txt.indexOf(SCL, pos)+1;
QString val = dec[txt.mid(pos, end - pos)];
bld << txt.mid(bgn, pos - bgn);
if(val.isEmpty()) {
end = txt.indexOf(AMP, pos+1);
bld << txt.mid(pos, end - pos);
} else {
bld << val;
}// else // if(val.isEmpty())
bgn = end; pos = end;
}// while((pos = txt.indexOf(AMP, pos)) != -1)
return bld.join(QString());
}// UNESC