如何从一段HTML源代码中获取字体颜色?

时间:2011-09-13 13:05:07

标签: c++ html visual-c++ mshtml

我有一个像这样的HTML源代码:

<FONT color=#5a6571>Beverly Mitchell</FONT> <FONT color=#5a6571>Shawnee Smith</FONT> <FONT color=#5a6571>Glenn Plummer</FONT> <NOBR>more &gt;&gt;</NOBR>

我试图检索“颜色”值,如下所示:

MSHTML::IHTMLDocument2Ptr htmDoc1 = NULL;
SAFEARRAY *psaStrings1 = SafeArrayCreateVector(VT_VARIANT, 0, 1);
CoCreateInstance(CLSID_HTMLDocument, NULL, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, (void**) &htmDoc1);

VARIANT *param1 = NULL;
HRESULT hr = SafeArrayAccessData(psaStrings1, (LPVOID*)&param1);
param1->vt = VT_BSTR;
param1->bstrVal = SysAllocString(varSrc1.bstrVal);

hr = SafeArrayUnaccessData(psaStrings1);
hr = htmDoc1->write(psaStrings1);

MSHTML::IHTMLElementPtr pElemBody1 = NULL;
MSHTML::IHTMLDOMNodePtr pHTMLBodyDOMNode1 =NULL;

hr = htmDoc1->get_body(&pElemBody1);
if(SUCCEEDED(hr))
{
    hr = pElemBody1->QueryInterface(IID_IHTMLDOMNode,(void**)&pHTMLBodyDOMNode1);
    if(SUCCEEDED(hr))
    {
        ProcessDomNodeSmartWrapper(pHTMLBodyDOMNode1, ProcTgtTagStrVec);
    }
}    

long lLength = 0;
MSHTML::IHTMLElementCollectionPtr pElemColl1 = NULL;
MSHTML::IHTMLElementPtr pChElem1 = NULL;
MSHTML::IHTMLStylePtr pStyle1 = NULL;
IDispatchPtr ppvdisp1 = NULL;

hr = htmDoc1->get_all(&pElemColl1);
hr = pElemColl1->get_length(&lLength);
for(long i = 0; i < lLength; i++)
{
    _variant_t name(i);
    _variant_t index(i);

    ppvdisp1 = pElemColl1->item(name, index);
    if(ppvdisp1 && SUCCEEDED(hr))
    {
        hr = ppvdisp1->QueryInterface(IID_IHTMLElement, (void **)&pChElem1);

        if(pChElem1 && SUCCEEDED(hr))
        {
            BSTR bstrTagName = NULL;

            pChElem1->get_tagName(&bstrTagName);
            hr = pChElem1->get_style(&pStyle1);
            if(pStyle1 && SUCCEEDED(hr))
            {
                _variant_t varFtCol;

                hr = pStyle1->get_color(&varFtCol);
                if(hr = S_OK && varFtCol)
                {
                    hmStyles1[wstring(varFtCol.bstrVal)] = L"FontColor";
                }
            }
            if(bstrTagName)
               SysFreeString(bstrTagName);
        } // if pStyle && SUCCEEDED(hr)
    }//if ppvdisp && SUCCEEDED(hr)
}//for

但我永远无法获得“颜色”值 - varFtCol.bstrVal在调试程序时是一个错误的指针。这是varFtCol在调试程序时显示的内容:

-       varFtCol    {???}   _variant_t
-       tagVARIANT  BSTR = 0x00000000  tagVARIANT
        vt  8   unsigned short
-       BSTR    0x00000000     wchar_t *
            CXX0030: Error: expression cannot be evaluated

#5a6571是十六进制颜色,表示RGB值为(90,101,113)。

如何获取此颜色信息?

2 个答案:

答案 0 :(得分:1)

根据MSDN documentation,IHTMLStyle :: get_color可以返回变量中的BSTR或整数值。您是否尝试将varFtCol分配给整数值并检查该结果?

const int colorValue = static_cast<int>(varFtCol);

作为建议,在使用_variant_t时,通常最好使用内置的转换操作符,而不是直接访问union本身的成员。

答案 1 :(得分:1)

您不应该在pChElem1上获取样式,因为在您的情况下颜色不是样式的一部分。颜色是Font元素的一部分。

相反,你必须调用pChElem1-&gt; getAttribute(“color”...)

这将返回#5a6571

以下代码在MFC中。但是,如果您不使用MFC,则可以轻松转换为常规Win32。

COLORREF GetColorFromHexString( CString szColor )
{
    TCHAR *szScan;
    CString strTemp;
    CString strColor = szColor;
    long lRR = 0,lGG = 0,lBB = 0;

    //first we will remove # characters which come from XML document
    strColor.TrimLeft(_T('#'));
    strColor.TrimRight(_T('#'));

    //it should be of the form RRGGBB
    if (strColor.GetLength() == 6) {
        //get red color, from the hexadecimal string
        strTemp = strColor.Left(2);
        lRR = _tcstol(LPCTSTR(strTemp),&szScan,16);

        //get green color
        strTemp = strColor.Mid(2,2);
        lGG = _tcstol(LPCTSTR(strTemp),&szScan,16);

        //get blue color
        strTemp = strColor.Right(2);
        lBB = _tcstol(LPCTSTR(strTemp),&szScan,16);
    }


    return RGB(lRR,lGG,lBB);
}