<html>
<A NAME="doc_id_1"></A>
<div class="find1">
Iam here, extract me.
</div>
<div class = "find2">
iam here also, extract me as well.
</div>
<A NAME="doc_id_2"></A>
</html>
我使用下面的代码来提取数据:
var nodes = doc.DocumentNode.SelectNodes("//a[@name = 'doc_id_1']");
var nodes1 = doc.DocumentNode.SelectNodes("//a[@name = 'doc_id_2']");
foreach (HtmlNode node in nodes)
{
string yourText1 = node.InnerText;
//var yourText2 = node.NextSibling.SelectNodes("//div");
string yourText2 = node.NextSibling.InnerHtml;
//foreach (HtmlNode var in yourText2)
//{
// string yourText3 = var.InnerHtml;
//}
}
我不想在这些div上给出类名。因为我正在写一个通用代码。任何帮助将不胜感激。
答案 0 :(得分:0)
我假设您将知道两个锚标记的名称值。
var doc = new HtmlDocument();
var firstAnchor = doc.DocumentNode.SelectSingleNode("//a[@name = 'doc_id_1']");
var div = firstAnchor.NextSibling;
while (div.Name != "doc_id_2") //when the name of the second anchor is found we have no more divs
{
var divText = div.InnerText; //do whatever with this
div = div.NextSibling;
}
答案 1 :(得分:0)
使用Linq的一个选项:
var doc = new HtmlDocument();
doc.LoadHtml(html: Resources.Html);
var startNode = doc.DocumentNode.SelectSingleNode("//a[@name = 'doc_id_1']");
var endNode = doc.DocumentNode.SelectSingleNode("//a[@name = 'doc_id_2']");
var parent = startNode.ParentNode;
var nodesYouWant = parent.ChildNodes
.SkipWhile(node => node != startNode) // skip all nodes up to the start node
.Skip(1) // skip the start node
.TakeWhile(node => node != endNode) // take all nodes up to the next anchor
.Where(node => node.Name == "div"); // select only div nodes
或者:
var currentNode = doc.DocumentNode.SelectSingleNode("//a[@name = 'doc_id_1']");
var endNode = doc.DocumentNode.SelectSingleNode("//a[@name = 'doc_id_2']");
var nodesYouWant = GetEnclosedNodes(currentNode, endNode).Where(node => node.Name == "div");
private static IEnumerable<HtmlNode> GetEnclosedNodes(HtmlNode currentNode, HtmlNode endNode)
{
currentNode = currentNode.NextSibling;
while (currentNode != null && currentNode != endNode)
{
yield return currentNode;
currentNode = currentNode.NextSibling;
}
}