此页面上的脚本导致您的网络浏览器运行缓慢
我试图在循环中获取多个网页的内容,在浏览~38个网址后代码变得不负责任,下面是代码。这项活动的目的。我已经在网上寻找了多个选项没有任何帮助。
此代码部分将URL的页面内容作为参数
传递public HtmlAgilityPack.HtmlDocument GetPageSource(string url)
{
webBrowserCtrl = new WebBrowser();
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
StringReader content = null;
webBrowserCtrl.ScriptErrorsSuppressed = true;
webBrowserCtrl.Navigate(url);
waitTillLoad(webBrowserCtrl);
IHTMLDocument3 documentAsIHtmlDocument = null;
try
{
documentAsIHtmlDocument = (mshtml.IHTMLDocument3)webBrowserCtrl.Document.DomDocument;
content = new StringReader(documentAsIHtmlDocument.documentElement.outerHTML);
doc.Load(content);
}
catch
{
Console.Write("Exception from web Page: {0}", url);
}
finally
{
if (documentAsIHtmlDocument != null)
{
Marshal.ReleaseComObject(documentAsIHtmlDocument);
documentAsIHtmlDocument = null;
}
content.Dispose();
content.Close();
webBrowserCtrl.Dispose();
System.GC.Collect();
System.GC.WaitForPendingFinalizers();
EmptyWorkingSet(Process.GetCurrentProcess().Handle);
}
return doc;
}
private void waitTillLoad(WebBrowser webBrControl)
{
while (webBrControl.IsBusy)
Application.DoEvents();
for (int i = 0; i < 500; i++)
{
if (webBrControl.ReadyState != System.Windows.Forms.WebBrowserReadyState.Complete)
{
Application.DoEvents();
Thread.Sleep(10);
}
else
break;
}
WebBrowserReadyState loadStatus;
int waittime = 100000;
//System.Threading.Thread.Sleep(1000000);
int counter = 0;
while (true)
{
loadStatus = webBrControl.ReadyState;
Application.DoEvents();
if ((counter > waittime) || (loadStatus == WebBrowserReadyState.Uninitialized) || (loadStatus == WebBrowserReadyState.Loading) || (loadStatus == WebBrowserReadyState.Interactive))
{
break;
}
counter++;
}
counter = 0;
while (true)
{
loadStatus = webBrControl.ReadyState;
Application.DoEvents();
if (loadStatus == WebBrowserReadyState.Complete && webBrControl.IsBusy != true)
{
break;
}
counter++;
}
}
答案 0 :(得分:0)
您的问题是您的waitTillLoad功能。您不应该对任何类型的HTML文档进行任何处理,直到您将方法保留在您告诉Web浏览器导航的方式之后。它是一个线程化的东西,因为你的Web浏览器和你的GUI在同一个线程上,你需要使用事件来完成工作。
所以你应该首先挂钩DocumentCompleted
事件,然后像这样调用你的waitTillLoad:
public HtmlAgilityPack.HtmlDocument GetPageSource(string url)
{
webBrowserCtrl = new WebBrowser();
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
StringReader content = null;
webBrowserCtrl.ScriptErrorsSuppressed = true;
webBrowserCtrl.Navigate(url);
webBrowserCtrl.DocumentCompleted += webBrowserCtrl_DocumentCompleted;
}
private void webBrowserCtrl_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
waitTillLoad(webBrowserCtrl);
IHTMLDocument3 documentAsIHtmlDocument = null;
try
{
documentAsIHtmlDocument = (mshtml.IHTMLDocument3)webBrowserCtrl.Document.DomDocument;
content = new StringReader(documentAsIHtmlDocument.documentElement.outerHTML);
doc.Load(content);
}
catch
{
Console.Write("Exception from web Page: {0}", url);
}
finally
{
if (documentAsIHtmlDocument != null)
{
Marshal.ReleaseComObject(documentAsIHtmlDocument);
documentAsIHtmlDocument = null;
}
content.Dispose();
content.Close();
webBrowserCtrl.Dispose();
System.GC.Collect();
System.GC.WaitForPendingFinalizers();
EmptyWorkingSet(Process.GetCurrentProcess().Handle);
}
return doc;
}
private void waitTillLoad(WebBrowser webBrControl)
{
while (webBrControl.IsBusy)
Application.DoEvents();
for (int i = 0; i < 500; i++)
{
if (webBrControl.ReadyState != System.Windows.Forms.WebBrowserReadyState.Complete)
{
Application.DoEvents();
Thread.Sleep(10);
}
else
break;
}
WebBrowserReadyState loadStatus;
int waittime = 100000;
//System.Threading.Thread.Sleep(1000000);
int counter = 0;
while (true)
{
loadStatus = webBrControl.ReadyState;
Application.DoEvents();
if ((counter > waittime) || (loadStatus == WebBrowserReadyState.Uninitialized) || (loadStatus == WebBrowserReadyState.Loading) || (loadStatus == WebBrowserReadyState.Interactive))
{
break;
}
counter++;
}
counter = 0;
while (true)
{
loadStatus = webBrControl.ReadyState;
Application.DoEvents();
if (loadStatus == WebBrowserReadyState.Complete && webBrControl.IsBusy != true)
{
break;
}
counter++;
}
}
}