Parallel.ForEach
继续运行,我的程序没有结束。我无法追踪第一次迭代后的位置。我的猜测是陷入僵局并继续进行上下文切换。
private void ReadInputFile()
{
var collection = new ConcurrentBag<PropertyRecord>();
var lines = System.IO.File.ReadLines(InputFileName);
int i = 0;
int RecordsCount = lines.Count();
Parallel.ForEach(lines, line =>
{
if (string.IsNullOrWhiteSpace(line))
{
return;
}
var tokens = line.Split(',');
var postalCode = tokens[0];
var country = tokens.Length > 1 ? tokens[1] : "england";
SetLabelNotifyTwoText(
string.Format(
"Reading PostCode {0} out of {1}"
i,
lines.Length));
var tempRecord = GetAllAddesses(postalCode, country);
if (tempRecord != null)
{
foreach (PropertyRecord r in tempRecord)
{
collection.Add(r);
}
}
});
}
private List<PropertyRecord> GetAllAddesses(
string postalCode,
string country = "england")
{
SetLabelNotifyText("");
progressBar1.Value = 0;
progressBar1.Update();
var records = new List<PropertyRecord>();
using (WebClient w = new WebClient())
{
var url = CreateUrl(postalCode, country);
var document = w.DownloadString(url);
var pagesCount = GetPagesCount(document);
if (pagesCount == null)
{
return null;
}
for (int i = 0; i < pagesCount; i++)
{
SetLabelNotifyText(
string.Format(
"Reading Page {0} out of {1}",
i,
pagesCount - 1));
url = CreateUrl(postalcode,country, i);
document = w.DownloadString(url);
var collection = Regex.Matches(
document,
"<div class=\"soldDetails\">(.|\\n|\\r)*?class=" +
"\"soldAddress\".*?>(?<address>.*?)(</a>|</div>)" +
"(.|\\n|\\r)*?class=\\\"noBed\\\">(?<noBed>.*?)" +
"</td>|</tbody>");
foreach (var match in collection)
{
var r = new PropertyRecord();
var bedroomCount = match.Groups["noBed"].Value;
if(!string.IsNullOrEmpty(bedroomCount))
{
r.BedroomCount = bedroomCount;
}
else
{
r.BedroomCount = "-1";
}
r.address = match.Groups["address"].Value;
var line = string.Format(
"\"{0}\",{1}",
r.address
r.BedroomCount);
OutputLines.Add(line);
Records.Add(r);
}
}
}
return Records;
}
在没有Parallel.ForEach
的情况下运行良好,但在要求中使用Parallel.ForEach
。
我已经调试了它并且在从第GetAllAdresses
方法返回后 - 第一次 Step Next 按钮暂停,它只是在后台继续调试。它没有回到我放置的任何书签上。
答案 0 :(得分:5)
正如您在评论中所说,您的SetLabelNotifyText
和SetLabelNotifyTwoText
方法会调用Control.Invoke
。
要使Control.Invoke
正常工作,主线程必须是免费的,但在你的情况下,你似乎通过在其中调用Parallel.ForEach
来阻止主线程。
这是一个极小的复制品:
private void button1_Click(object sender, EventArgs e)
{
Parallel.ForEach(Enumerable.Range(1, 100), (i) =>
{
Thread.Sleep(10);//Simulate some work
this.Invoke(new Action(() => SetText(i)));
});
}
private void SetText(int i)
{
textBox1.Text = i.ToString();
}
主线程等待Parallel.ForEach
,工作线程等待主线程,从而导致死锁。
如何修复:不要使用Invoke
只使用BeginInvoke
或不要阻止MainThread。
如果这不是sscce的案例,那对我们有帮助
答案 1 :(得分:1)
更改您的代码,使用async
and await
。这是使用BeginInvoke
和其他异步代码模型的现代替代方法。
private async Task ReadInputFile()
{
var collection = new ConcurrentBag<PropertyRecord>();
var lines = System.IO.File.ReadLines(InputFileName);
int i = 0;
int RecordsCount = lines.Count();
Parallel.ForEach(lines, line =>
{
if (string.IsNullOrWhiteSpace(line))
{
return;
}
var tokens = line.Split(',');
var postalCode = tokens[0];
var country = tokens.Length > 1 ? tokens[1] : "england";
SetLabelNotifyTwoText(
string.Format(
"Reading PostCode {0} out of {1}"
i,
lines.Length));
var tempRecord = await GetAllAddesses(postalCode, country);
if (tempRecord != null)
{
foreach (PropertyRecord r in tempRecord)
{
collection.Add(r);
}
}
});
}
private async Task<List<PropertyRecord>> GetAllAddesses(
string postalCode,
string country = "england")
{
SetLabelNotifyText("");
progressBar1.Value = 0;
progressBar1.Update();
var records = new List<PropertyRecord>();
using (WebClient w = new WebClient())
{
var url = CreateUrl(postalCode, country);
var document = await w.DownloadStringTaskAsync(url);
var pagesCount = GetPagesCount(document);
if (pagesCount == null)
{
return null;
}
for (int i = 0; i < pagesCount; i++)
{
SetLabelNotifyText(
string.Format(
"Reading Page {0} out of {1}",
i,
pagesCount - 1));
url = CreateUrl(postalcode,country, i);
document = await w.DownloadStringTaskAsync(url);
var collection = Regex.Matches(
document,
"<div class=\"soldDetails\">(.|\\n|\\r)*?class=" +
"\"soldAddress\".*?>(?<address>.*?)(</a>|</div>)" +
"(.|\\n|\\r)*?class=\\\"noBed\\\">(?<noBed>.*?)" +
"</td>|</tbody>");
foreach (var match in collection)
{
var r = new PropertyRecord();
var bedroomCount = match.Groups["noBed"].Value;
if(!string.IsNullOrEmpty(bedroomCount))
{
r.BedroomCount = bedroomCount;
}
else
{
r.BedroomCount = "-1";
}
r.address = match.Groups["address"].Value;
var line = string.Format(
"\"{0}\",{1}",
r.address
r.BedroomCount);
OutputLines.Add(line);
Records.Add(r);
}
}
}
return Records;
}
然后像这样称呼它
ReadInputFile.Wait();
或者更好的是,来电者是async
,
await ReadInputFile();