我正在处理一个项目列表(200k - 300k),每个项目处理时间在2到8秒之间。为了获得时间,我可以并行处理此列表。因为我处于异步上下文中,所以我使用类似的东西:
public async Task<List<Keyword>> DoWord(List<string> keyword)
{
ConcurrentBag<Keyword> keywordResults = new ConcurrentBag<Keyword>();
if (keyword.Count > 0)
{
try
{
var tasks = keyword.Select(async kw =>
{
return await Work(kw).ConfigureAwait(false);
});
keywordResults = new ConcurrentBag<Keyword>(await Task.WhenAll(tasks).ConfigureAwait(false));
}
catch (AggregateException ae)
{
foreach (Exception innerEx in ae.InnerExceptions)
{
log.ErrorFormat("Core threads exception: {0}", innerEx);
}
}
}
return keywordResults.ToList();
}
关键字列表总是包含8个元素(从上面开始),因此我按8处理我的列表8但是,在这种情况下,我猜如果7秒内处理7个关键字,10秒处理8个关键字, 8个关键字的总时间将为10(如果我错了,请纠正我)。
我如何从Parallel.Foreach
接近呢?我的意思是:如果其中一个完成,则启动8个关键字,再启动1个。在这种情况下,我将永久拥有8个工作流程。有什么想法吗?
答案 0 :(得分:3)
另一种更简单的方法是使用AsyncEnumerator NuGet Package:
using System.Collections.Async;
public async Task<List<Keyword>> DoWord(List<string> keywords)
{
var keywordResults = new ConcurrentBag<Keyword>();
await keywords.ParallelForEachAsync(async keyword =>
{
try
{
var result = await Work(keyword);
keywordResults.Add(result);
}
catch (AggregateException ae)
{
foreach (Exception innerEx in ae.InnerExceptions)
{
log.ErrorFormat("Core threads exception: {0}", innerEx);
}
}
}, maxDegreeOfParallelism: 8);
return keywordResults.ToList();
}
答案 1 :(得分:2)
以下是一些示例代码,展示了如何使用TPL Dataflow
来解决此问题。
请注意,为了编译它,您需要通过NuGet将TPL数据流添加到您的项目中。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Threading.Tasks.Dataflow;
namespace Demo
{
class Keyword // Dummy test class.
{
public string Name;
}
class Program
{
static void Main()
{
// Dummy test data.
var keywords = Enumerable.Range(1, 100).Select(n => n.ToString()).ToList();
var result = DoWork(keywords).Result;
Console.WriteLine("---------------------------------");
foreach (var item in result)
Console.WriteLine(item.Name);
}
public static async Task<List<Keyword>> DoWork(List<string> keywords)
{
var input = new TransformBlock<string, Keyword>
(
async s => await Work(s),
// This is where you specify the max number of threads to use.
new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 8 }
);
var result = new List<Keyword>();
var output = new ActionBlock<Keyword>
(
item => result.Add(item), // Output only 1 item at a time, because 'result.Add()' is not threadsafe.
new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 1 }
);
input.LinkTo(output, new DataflowLinkOptions { PropagateCompletion = true });
foreach (string s in keywords)
await input.SendAsync(s);
input.Complete();
await output.Completion;
return result;
}
public static async Task<Keyword> Work(string s) // Stubbed test method.
{
Console.WriteLine("Processing " + s);
int delay;
lock (rng) { delay = rng.Next(10, 1000); }
await Task.Delay(delay); // Simulate load.
Console.WriteLine("Completed " + s);
return await Task.Run( () => new Keyword { Name = s });
}
static Random rng = new Random();
}
}