C#异步Web浏览器运行速度非常慢

时间:2015-02-12 01:15:34

标签: c# asynchronous webbrowser-control async-await

该程序读取网站列表然后保存它们。 我发现它对前2个url请求运行良好。然后变得很慢(每次请求大约5分钟)

第1行和第2行的时间仅为2秒。 然后所有其他人将分别约5分钟。

当我调试时,我发现它在wb.Navigate(url.ToString());

中花了很长时间
    public static async Task<bool> test()
    {

        long totalCnt = rows.Count();
        long procCnt = 0;
        foreach (string url in rows)
        {
            procCnt++;

            string webStr = load_WebStr(url).Result;
            Console.WriteLine(DateTime.Now+ "["+procCnt + "/" + totalCnt+"]  "+url);
        }



        return true;
    }


public static async Task<string> load_WebStr(string url)
{
    var tcs = new TaskCompletionSource<string>();

    var thread = new Thread(() =>
    {
        EventHandler idleHandler = null;

        idleHandler = async (s, e) =>
        {
            // handle Application.Idle just once
            Application.Idle -= idleHandler;

            // return to the message loop
            await Task.Yield();

            // and continue asynchronously
            // propogate the result or exception
            try
            {
                var result = await webBrowser_Async(url);
                tcs.SetResult(result);
            }
            catch (Exception ex)
            {
                tcs.SetException(ex);
            }

            // signal to exit the message loop
            // Application.Run will exit at this point
            Application.ExitThread();
        };

        // handle Application.Idle just once
        // to make sure we're inside the message loop
        // and SynchronizationContext has been correctly installed
        Application.Idle += idleHandler;
        Application.Run();
    });

    // set STA model for the new thread
    thread.SetApartmentState(ApartmentState.STA);

    // start the thread and await for the task
    thread.Start();
    try
    {
        return await tcs.Task;
    }
    finally
    {
        thread.Join();
    }

}


public static async Task<string> webBrowser_Async(string url)
{

    string result = "";
    using (var wb = new WebBrowser())
    {
        wb.ScriptErrorsSuppressed = true;

        TaskCompletionSource<bool> tcs = null;
        WebBrowserDocumentCompletedEventHandler documentCompletedHandler = (s, e) =>
        tcs.TrySetResult(true);


        tcs = new TaskCompletionSource<bool>();
        wb.DocumentCompleted += documentCompletedHandler;
        try
        {
            wb.Navigate(url.ToString());
            // await for DocumentCompleted
            await tcs.Task;
        }
        catch
        {
            Console.WriteLine("BUG!");

        }
        finally
        {
            wb.DocumentCompleted -= documentCompletedHandler;
        }
        // the DOM is ready

        result = wb.DocumentText;

    }


    return result;
}

1 个答案:

答案 0 :(得分:0)

我认识到我用来回答相当多的WebBrowser相关问题的代码的略微修改版本。是this one吗?包含原始来源的链接总是一个好主意。

无论如何,您在此处使用它的主要问题可能是您为列表中的每个网址创建并销毁WebBrowser控件的实例。

相反,您应该重新使用WebBrowser的单个实例(或WebBrowser个对象池)。您可以找到两个版本here