我们有一个.aspx页面,以编程方式加载最多5个网址的网页内容,并检查每个网页内容是否有关键字。以下代码正在运行,但需要提高cpu使用率和时间性能,我不是这方面的专家
首先:使用异步网络表单等待网址
protected void Page_Load(object sender, EventArgs e)
{
//the page won't load till the registered task is completed
RegisterAsyncTask(new PageAsyncTask(ProcessURLsKeywords));
}
第二步:为每个url-keyword对创建任务并等待它们全部完成然后继续返回页面响应
private async Task ProcessURLsKeywords()
{
List<Task> availableTasks = new List<Task>();
failedURLs = new ConcurrentBag<string>(); //thread-safe collection of unordered items
//start checking each keyword-url pair
if (key1 != null && !string.IsNullOrWhiteSpace(url1))
{
availableTasks.Add(CheckPageContent(url1, key1));
}
//do the same for url2,key2 and url3,key3...etc
await Task.WhenAll(availableTasks.ToArray());
//return response
}
第三:功能获取内容
private async Task CheckPageContent(string url, string key)
{
try
{
string content;
var browser = new WebProcessor();
content = await browser.GetGeneratedHTML(url);
if (content != null)
{
if (!content.ToLower().Contains(key.ToLower()))
{
failedURLs.Add(url);
}
}
content = null;
}
catch (Exception ex)
{
}
}
WebProcessor类为每个URL创建STA线程并返回结果
public class WebProcessor
{
private string GeneratedSource { get; set; }
private string URL { get; set; }
private MyWebBrowser wb { get; set; }
private static Mutex mutex = new Mutex();
public async Task<string> GetGeneratedHTML(string url)
{
URL = url;
await Task.Run(() =>
{
Thread t = new Thread(new ThreadStart(WebBrowserThread));
t.SetApartmentState(ApartmentState.STA);
t.Start();
t.Join();
});
return GeneratedSource;
}
private void WebBrowserThread()
{
mutex.WaitOne();
wb = new MyWebBrowser();
try
{
wb.AllowNavigation = true;
wb.ScriptErrorsSuppressed = true;
wb.Navigate(URL);
wb.DocumentCompleted +=
new WebBrowserDocumentCompletedEventHandler(
wb_DocumentCompleted);
while (wb.ReadyState != WebBrowserReadyState.Complete)
Application.DoEvents();
//Added this line, because the final HTML takes a while to show up
GeneratedSource = wb.DocumentText;
//GeneratedSource = wb.Document.Body.InnerText;
}
catch (Exception ex)
{
}
finally
{
mutex.ReleaseMutex();
wb.Dispose();
}
}
private void wb_DocumentCompleted(object sender,
WebBrowserDocumentCompletedEventArgs e)
{
try
{
MyWebBrowser wb = (MyWebBrowser)sender;
if (wb.ReadyState != System.Windows.Forms.WebBrowserReadyState.Complete)
return;
if (wb.Document == null)
return;
GeneratedSource = wb.Document.Body.InnerText;
//load frame content within page - web browser doesn't do that automatically
foreach (var frame in wb.Document.Window.Frames)
{
HtmlWindow winframe = frame as HtmlWindow;
WebProcessor webFrame = new WebProcessor();
try
{
System.Threading.SynchronizationContext.Current.Post(async delegate
{
try
{
GeneratedSource += await webFrame.GetGeneratedHTML(winframe.Url.AbsoluteUri);
}
catch (Exception ex)
{
}
}, null);
}
catch (Exception ex)
{
}
}
}
catch (Exception ex)
{
}
}
/*********************************************************************************************/
}
我尝试集成Application.Run()和消息泵功能,但是Application.Run()上的代码块