我有简单的代码用HTML
解析HtmlAgilityPack
并将结果上传到SQL data server
。问题是,当我使用foreach
循环时,它正常工作。我想尝试Parell for each
循环。我看到,抓取速度更快,但我得到(不是每次)堆栈溢出异常。你能看一下代码并告诉我原因吗?
//for each link object
Parallel.ForEach(link, _link =>
{
pageNumber=0;
position=1;
//try to load every single page number
while (true)
{
//load page's html
siteHtml = web.Load(_link.LinkUrl + "?page=" + pageNumber);
try
{
doc.LoadHtml(siteHtml.DocumentNode.SelectNodes("//section[@class='answers']")[0].InnerHtml);
comments = doc.DocumentNode.SelectNodes("//singleAnswer");
foreach (HtmlNode _comments in comments)
{
HtmlAgilityPack.HtmlDocument HtmlPage2 = new HtmlAgilityPack.HtmlDocument();
HtmlPage2.LoadHtml(_comments.InnerHtml)
commentId = Convert.ToInt32(_comments.GetAttributeValue("id", ""));
commentValue = (HtmlPage2.DocumentNode.SelectNodes("//p[@class='content']")[0].InnerText);
if (commentValue.Contains(_link.Keyword))
{
sql.updateComment(_link.LinkId, commentValue);
_link.Position = position;
_link.MyCommentId = commentId;
goto NextLink;
}
position++;
}
}
catch
{
}
if (!siteHtml.DocumentNode.InnerHtml.Contains(@"class=""pagingx"" rel=""nextPage"">"))
{
break;
}
pageNumber++;
}
NextLink:;
groupBox1.Invoke(new Action(delegate ()
{
groupBox1.Text = "Link's statistics (" + finished + "/" + linksUrlElements + ")";
}));
});
我还看到Parallel for each
而不是for each