现在我可以获得unicode值,我必须从unicode值调用之前得到它的字符集
HFONT CreateFont(
int nHeight, // height of font
int nWidth, // average character width
int nEscapement, // angle of escapement
int nOrientation, // base-line orientation angle
int fnWeight, // font weight
DWORD fdwItalic, // italic attribute option
DWORD fdwUnderline, // underline attribute option
DWORD fdwStrikeOut, // strikeout attribute option
DWORD fdwCharSet, // character set identifier
DWORD fdwOutputPrecision, // output precision
DWORD fdwClipPrecision, // clipping precision
DWORD fdwQuality, // output quality
DWORD fdwPitchAndFamily, // pitch and family
LPCTSTR lpszFace // typeface name
);
我从MSDN收到了以下消息:
fdwCharSet
[in] Specifies the character set. The following values are predefined:
ANSI_CHARSET
BALTIC_CHARSET
CHINESEBIG5_CHARSET
DEFAULT_CHARSET
EASTEUROPE_CHARSET
GB2312_CHARSET
GREEK_CHARSET
HANGUL_CHARSET
MAC_CHARSET
OEM_CHARSET
RUSSIAN_CHARSET
SHIFTJIS_CHARSET
SYMBOL_CHARSET
TURKISH_CHARSET
VIETNAMESE_CHARSET
Korean language edition of Windows:
JOHAB_CHARSET
Middle East language edition of Windows:
ARABIC_CHARSET
HEBREW_CHARSET
Thai language edition of Windows:
THAI_CHARSET
The OEM_CHARSET value specifies a character set that is operating-system dependent.
Windows 95/98/Me: You can use the DEFAULT_CHARSET value to allow the name and size of a font to fully describe the logical font. If the specified font name does not exist, a font from any character set can be substituted for the specified font, so you should use DEFAULT_CHARSET sparingly to avoid unexpected results.
以下是我现在所拥有的:
FX_INT32 CharSetFromUnicode(FX_WORD word)
{
int nACP = GetACP();
switch (nACP)
{
case 932:
case 936:
case 950:
case 949:
if ((word >= 0x2E80 && word <= 0x2EFF) ||
(word >= 0x3000 && word <= 0x303F) ||
(word >= 0x3200 && word <= 0x32FF) ||
(word >= 0x3300 && word <= 0x33FF) ||
(word >= 0x3400 && word <= 0x4DB5) ||
(word >= 0x4E00 && word <= 0x9FFF) ||
(word >= 0xF900 && word <= 0xFAFF) ||
(word >= 0xFE30 && word <= 0xFE4F) ||
(word >= 0x20000 && word <= 0x2A6D6) ||
(word >= 0x2F800 && word <= 0x2FA1F))
{
switch (nACP)
{
case 932:
return SHIFTJIS_CHARSET;
case 936:
case 950:
return GB2312_CHARSET;
case 949:
return HANGUL_CHARSET;
}
}
break;
}
//find new charset
if ((word >= 0x4E00 && word <= 0x9FA5) ||
(word >= 0xE7C7 && word <= 0xE7F3) ||
(word >= 0x3000 && word <= 0x303F) || //)"《" "》" "。" "、"
(word >= 0x2000 && word <= 0x206F))
{
return GB2312_CHARSET;
}
if (((word >= 0x3040) && (word <= 0x309F)) ||
((word >= 0x30A0) && (word <= 0x30FF)) ||
((word >= 0x31F0) && (word <= 0x31FF)) ||
((word >= 0xFF00) && (word <= 0xFFEF)) )
{
return SHIFTJIS_CHARSET;
}
if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
((word >= 0x1100) && (word <= 0x11FF)) ||
((word >= 0x3130) && (word <= 0x318F)))
{
return HANGUL_CHARSET;
}
if (word >= 0x0E00 && word <= 0x0E7F)
return THAI_CHARSET;
if ((word >= 0x0370 && word <= 0x03FF) ||
(word >= 0x1F00 && word <= 0x1FFF))
return GREEK_CHARSET;
if ((word >= 0x0600 && word <= 0x06FF) ||
(word >= 0xFB50 && word <= 0xFEFC))
return ARABIC_CHARSET;
if (word >= 0x0590 && word <= 0x05FF)
return HEBREW_CHARSET;
if (word >= 0x0400 && word <= 0x04FF)
return RUSSIAN_CHARSET;
if (word == 0x11E || word == 0x11F || word == 0x130 || word == 0x131 || word == 0x15E || word == 0x15F)
return TURKISH_CHARSET;
if (word >= 0x0100 && word <= 0x024F)
return EASTEUROPE_CHARSET;
if (word >= 0x1E00 && word <= 0x1EFF)
return VIETNAMESE_CHARSET;
return GB2312_CHARSET;
}
..但功能无法正常工作。任何人都可以帮我解决吗?
答案 0 :(得分:2)
通常,没有保证方式来猜测编码。
然而,实际上可以猜到。例如,Mozilla创建了非常好的通用字符集检测库: uchardet 。
它在Firefox中用于自动猜测您访问的随机页面的字符集(如果有的话,它们并不总是提供正确的编码),并且它在实践中似乎运行良好。