如何检测InnerHtml何时混合使用文本和嵌套子元素?

时间:2013-08-04 06:00:51

标签: c# dom

在c#中 我有两个函数迭代HtmlDocument的HtmlElements 我遇到的问题是当我击中一个元素时 包含text和child元素的混合,该元素的文本部分 被忽略了。例如,如果点击一个节点,如: < div> hello1< div> hello2< / div> hello3< / div>。

我的功能只会 拿起hello2,而hello1和hello3被忽略。我真的想保留解决方案而没有 将HtmlElement转换为XElement。有没有办法获得hello1和hello3?

private static XElement FlattenChildren(this HtmlDocument htmlDoc, string rootname, string rowname)
    {
        XElement result = new XElement(rootname);
        List<XElement> resultList = new List<XElement>();
        HtmlElement htmlRoot = htmlDoc.GetElementsByTagName("html")[0];

        foreach (HtmlElement elem in htmlRoot.Children)
        {
            if (elem.Children.Count == 0)
            {
                if (elem.IgnoreTag() == false && elem.HasQualifiedValue() == true)
                {
                    string gr = elem.GetAttribute("gr");
                    string gs = elem.GetAttribute("gs");
                    if (string.IsNullOrEmpty(gr) == false && string.IsNullOrEmpty(gs) == false)
                    {
                        XElement newelem = new XElement(rowname);
                        newelem.SetAttributeValue("gr", gr);
                        newelem.SetAttributeValue("gs", gs);
                        newelem.Value = elem.InnerText;
                        resultList.Add(newelem);
                    }
                }
            }
            else
            {
                FlattenChildrenIter(elem, rowname, ref resultList);
            }
        }
        foreach (var xelem in resultList)
        {
            result.Add(xelem);
        }
        return result;
    }

private static void FlattenChildrenIter(HtmlElement p, string rowname, ref List<XElement> resultList)
    {
        foreach (HtmlElement elem in p.Children)
        {
            if (elem.Children.Count == 0)
            {
                if (elem.IgnoreTag() == false && elem.HasQualifiedValue() == true)
                {
                    string gr = elem.GetAttribute("gr");
                    string gs = elem.GetAttribute("gs");
                    if (string.IsNullOrEmpty(gr) == false && string.IsNullOrEmpty(gs) == false)
                    {
                        XElement newelem = new XElement(rowname);
                        newelem.SetAttributeValue("gr", gr);
                        newelem.SetAttributeValue("gs", gs);
                        newelem.Value = elem.InnerText;
                        resultList.Add(newelem);
                    }
                }
            }
            else
            {
                if (elem.CountXText() > 0)
                {
                    var test = 5;
                }
                FlattenChildrenIter(elem, rowname, ref resultList);
            }
        }
    }
    private static int CountXText(this HtmlElement e)
    {
        try
        {
            var p = XElement.Parse(e.OuterHtml); //<----this Parsing does not work all the time????
            var textNodes = from c in p.Nodes()
                            where c.NodeType == XmlNodeType.Text
                            select (XText)c;

            return textNodes.Count();
        }
        catch
        {
            return 0;
        }
    }

0 个答案:

没有答案