在c#中 我有两个函数迭代HtmlDocument的HtmlElements 我遇到的问题是当我击中一个元素时 包含text和child元素的混合,该元素的文本部分 被忽略了。例如,如果点击一个节点,如: < div> hello1< div> hello2< / div> hello3< / div>。
我的功能只会 拿起hello2,而hello1和hello3被忽略。我真的想保留解决方案而没有 将HtmlElement转换为XElement。有没有办法获得hello1和hello3?
private static XElement FlattenChildren(this HtmlDocument htmlDoc, string rootname, string rowname)
{
XElement result = new XElement(rootname);
List<XElement> resultList = new List<XElement>();
HtmlElement htmlRoot = htmlDoc.GetElementsByTagName("html")[0];
foreach (HtmlElement elem in htmlRoot.Children)
{
if (elem.Children.Count == 0)
{
if (elem.IgnoreTag() == false && elem.HasQualifiedValue() == true)
{
string gr = elem.GetAttribute("gr");
string gs = elem.GetAttribute("gs");
if (string.IsNullOrEmpty(gr) == false && string.IsNullOrEmpty(gs) == false)
{
XElement newelem = new XElement(rowname);
newelem.SetAttributeValue("gr", gr);
newelem.SetAttributeValue("gs", gs);
newelem.Value = elem.InnerText;
resultList.Add(newelem);
}
}
}
else
{
FlattenChildrenIter(elem, rowname, ref resultList);
}
}
foreach (var xelem in resultList)
{
result.Add(xelem);
}
return result;
}
private static void FlattenChildrenIter(HtmlElement p, string rowname, ref List<XElement> resultList)
{
foreach (HtmlElement elem in p.Children)
{
if (elem.Children.Count == 0)
{
if (elem.IgnoreTag() == false && elem.HasQualifiedValue() == true)
{
string gr = elem.GetAttribute("gr");
string gs = elem.GetAttribute("gs");
if (string.IsNullOrEmpty(gr) == false && string.IsNullOrEmpty(gs) == false)
{
XElement newelem = new XElement(rowname);
newelem.SetAttributeValue("gr", gr);
newelem.SetAttributeValue("gs", gs);
newelem.Value = elem.InnerText;
resultList.Add(newelem);
}
}
}
else
{
if (elem.CountXText() > 0)
{
var test = 5;
}
FlattenChildrenIter(elem, rowname, ref resultList);
}
}
}
private static int CountXText(this HtmlElement e)
{
try
{
var p = XElement.Parse(e.OuterHtml); //<----this Parsing does not work all the time????
var textNodes = from c in p.Nodes()
where c.NodeType == XmlNodeType.Text
select (XText)c;
return textNodes.Count();
}
catch
{
return 0;
}
}