我有一个基本上以并行方式迭代的函数来并行化以下过程:
问题是第一次运行大约需要5分钟,第二次需要40分钟。输入集合不会更改,因此运行时间应非常相似。有什么想法吗?
我已经包含了GetPrices(int)。第二次运行时,它从netstat中的0个连接开始(因此第一次占用可用连接时没有连接)但是它只增加到5个连接(而不是第一次运行中的30个连接)。
另外,在您看来,回归收集是否需要锁定?
public Dictionary<int, Dictionary<int, double>> GetPrices(List<int> IDs)
{
Stopwatch web_time = new Stopwatch(), regex_time = new Stopwatch();
Dictionary<int, Dictionary<int, double>> ret = new Dictionary<int, Dictionary<int, double>>();
int aux_bkp = ServicePointManager.DefaultConnectionLimit;
ParallelOptions pOptions = new ParallelOptions();
ServicePointManager.DefaultConnectionLimit = 30;
pOptions.MaxDegreeOfParallelism = 35;
Parallel.ForEach(IDs, pOptions, ID=>
{
Dictionary<int, double> aux = GetPrices(ID);
lock (ret)
{
ret.Add(ID, new Dictionary<int, double>());
foreach (int kID in aux.Keys)
{
ret[mktID].Add(kID , aux[kID ]);
}
}
});
ServicePointManager.DefaultConnectionLimit = aux_bkp;
return ret;
}
public static Dictionary<int, double> GetPrices(int ID)
{
Stopwatch web_time = new Stopwatch(), regex_time = new Stopwatch();
WebClient webclient = new WebClient();
string resp;
Dictionary<int, double> ret = new Dictionary<int, double>();
bool success = false;
int retries = 0;
web_time.Start();
while (!success)
try
{
Debug.WriteLine(string.Format("Get HTML: ({1}) - ({0})", url, ID));
resp = webclient.DownloadString(url);
success = true;
web_time.Stop();
regex_time.Start();
regex_time.Start();
ret = ProcessHTML(resp, ID);
regex_time.Stop();
}
catch (WebException e)
{
System.Net.HttpWebResponse aux;
aux = e.Response as System.Net.HttpWebResponse;
if (aux == null || aux.StatusCode != HttpStatusCode.NotFound)
{
success = false;
retries++;
System.Threading.Thread.Sleep(new System.Random().Next(5000));
System.Diagnostics.Debug.WriteLine("HTTP Error - " + e.ToString());
}
else if (aux != null || aux.StatusCode == HttpStatusCode.NotFound)
{
success = true;
}
}
regex_time.Stop();
return ret;
}