Question

我有一个基本上以并行方式迭代的函数来并行化以下过程：

获取网页
解析HTML代码
返回结果并添加到结果集合

问题是第一次运行大约需要5分钟，第二次需要40分钟。输入集合不会更改，因此运行时间应非常相似。有什么想法吗？

我已经包含了GetPrices（int）。第二次运行时，它从netstat中的0个连接开始（因此第一次占用可用连接时没有连接）但是它只增加到5个连接（而不是第一次运行中的30个连接）。

另外，在您看来，回归收集是否需要锁定？

    public Dictionary<int, Dictionary<int, double>> GetPrices(List<int> IDs)
    {
        Stopwatch web_time = new Stopwatch(), regex_time = new Stopwatch();
        Dictionary<int, Dictionary<int, double>> ret = new Dictionary<int, Dictionary<int, double>>();
        int aux_bkp = ServicePointManager.DefaultConnectionLimit;
        ParallelOptions pOptions = new ParallelOptions();

        ServicePointManager.DefaultConnectionLimit = 30;

        pOptions.MaxDegreeOfParallelism = 35;
        Parallel.ForEach(IDs, pOptions, ID=>
            {
                Dictionary<int, double> aux = GetPrices(ID);

                lock (ret)
                {
                    ret.Add(ID, new Dictionary<int, double>());
                    foreach (int kID in aux.Keys)
                    {
                        ret[mktID].Add(kID , aux[kID ]);
                    }
                }

            });

        ServicePointManager.DefaultConnectionLimit = aux_bkp;

        return ret;
    }
    public static Dictionary<int, double> GetPrices(int ID)
    {
        Stopwatch web_time = new Stopwatch(), regex_time = new Stopwatch();
        WebClient webclient = new WebClient();
        string resp;
        Dictionary<int, double> ret = new Dictionary<int, double>();
        bool success = false;
        int retries = 0;

        web_time.Start();
        while (!success)
            try
            {
                Debug.WriteLine(string.Format("Get HTML: ({1}) - ({0})", url, ID));

                resp = webclient.DownloadString(url);
                success = true;
                web_time.Stop();

                regex_time.Start();


                regex_time.Start();

                ret = ProcessHTML(resp, ID);
                regex_time.Stop();

            }
            catch (WebException e)
            {

                System.Net.HttpWebResponse aux;

                aux = e.Response as System.Net.HttpWebResponse;
                if (aux == null || aux.StatusCode != HttpStatusCode.NotFound)
                {
                    success = false;
                    retries++;
                    System.Threading.Thread.Sleep(new System.Random().Next(5000));
                    System.Diagnostics.Debug.WriteLine("HTTP Error - " + e.ToString());
                }
                else if (aux != null || aux.StatusCode == HttpStatusCode.NotFound)
                {
                    success = true;
                }


            }

        regex_time.Stop();
        return ret;
        }

并行ForEach - 连续呼叫持续时间更长

0 个答案: