我有一个程序如下
class Program
{
public static int TaskCount { get; set; }
public static BlockingCollection<string> queue = new BlockingCollection<string>(new ConcurrentQueue<string>());
static void Main(string[] args)
{
TaskCount = 3;
Task.Factory.StartNew(() => Producer());
for (int i = 0; i < TaskCount; i++)
Task.Factory.StartNew(() => Consumer());
Console.ReadKey();
}
private static void Producer()
{
using (StreamWriter sw = File.AppendText(@"C:\pcadder.txt"))
{
for (int i = 0; i < 15; i++)
{
queue.Add("Item: " + (i+1).ToString());
var message = string.Format("{2}.Item added: Item {0} at {1}", (i+1).ToString(), DateTime.Now.ToString("yyyy/MM/dd hh:mm:ss.ffffff"),i+1);
Console.WriteLine(message);
sw.WriteLine(message);
}
queue.CompleteAdding();
}
}
private static void Consumer()
{
int count = 1;
foreach (var item in queue.GetConsumingEnumerable())
{
var message = string.Format("{3}.Item taken: {0} at {1} by thread {2}.", item, DateTime.Now.ToString("yyyy/MM/dd hh:mm:ss.ffffff"),
Thread.CurrentThread.ManagedThreadId,count);
Console.WriteLine(message);
using (StreamWriter sw = File.AppendText(@"C:\pctaker.txt"))
sw.WriteLine(message);
count += 1;
}
}
}
1.Item added: Item 1 at 2017.07.06 09:58:49.784734
2.Item added: Item 2 at 2017.07.06 09:58:49.784734
3.Item added: Item 3 at 2017.07.06 09:58:49.784734
4.Item added: Item 4 at 2017.07.06 09:58:49.784734
5.Item added: Item 5 at 2017.07.06 09:58:49.784734
6.Item added: Item 6 at 2017.07.06 09:58:49.784734
7.Item added: Item 7 at 2017.07.06 09:58:49.784734
8.Item added: Item 8 at 2017.07.06 09:58:49.784734
9.Item added: Item 9 at 2017.07.06 09:58:49.784734
10.Item added: Item 10 at 2017.07.06 09:58:49.784734
11.Item added: Item 11 at 2017.07.06 09:58:49.784734
12.Item added: Item 12 at 2017.07.06 09:58:49.784734
13.Item added: Item 13 at 2017.07.06 09:58:49.784734
14.Item added: Item 14 at 2017.07.06 09:58:49.784734
15.Item added: Item 15 at 2017.07.06 09:58:49.784734
1.Item taken: Item: 3 at 2017.07.06 09:58:49.784734 by thread 7.
1.Item taken: Item: 2 at 2017.07.06 09:58:49.784734 by thread 4.
1.Item taken: Item: 1 at 2017.07.06 09:58:49.784734 by thread 5.
2.Item taken: Item: 5 at 2017.07.06 09:58:49.784734 by thread 4.
2.Item taken: Item: 4 at 2017.07.06 09:58:49.784734 by thread 7.
2.Item taken: Item: 6 at 2017.07.06 09:58:49.784734 by thread 5.
3.Item taken: Item: 7 at 2017.07.06 09:58:49.784734 by thread 4.
3.Item taken: Item: 8 at 2017.07.06 09:58:49.784734 by thread 7.
3.Item taken: Item: 9 at 2017.07.06 09:58:49.784734 by thread 5.
4.Item taken: Item: 11 at 2017.07.06 09:58:49.784734 by thread 7.
4.Item taken: Item: 12 at 2017.07.06 09:58:49.784734 by thread 5.
5.Item taken: Item: 13 at 2017.07.06 09:58:49.784734 by thread 7.
5.Item taken: Item: 14 at 2017.07.06 09:58:49.784734 by thread 5.
6.Item taken: Item: 15 at 2017.07.06 09:58:49.784734 by thread 7.
几乎每次运行程序后,我都会在消费者日志中丢失一个项目。(此处,Item 10
缺失)。我无法理解为什么会这样。
答案 0 :(得分:6)
下面
using (StreamWriter sw = File.AppendText(@"C:\pctaker.txt"))
sw.WriteLine(message);
您可以从多个线程快速写入同一文件。这不是一个好主意,这段代码实际上会引发异常。它在您的代码中不被注意,因为您不处理任何异常,它发生在后台线程中,因此不会使您的应用程序崩溃。这就解释了为什么日志中缺少项目的原因。您可以写入相同的文件,例如:
// create it outside `Consumer` and make synchronized
using (var taker = TextWriter.Synchronized(File.AppendText(@"pctaker.txt"))) {
TaskCount = 3;
Task.Factory.StartNew(() => Producer());
//Producer();
for (int i = 0; i < TaskCount; i++)
// pass to consumer
Task.Factory.StartNew(() => Consumer(taker));
Console.ReadKey();
}
private static void Consumer(TextWriter writer)
{
int count = 1;
foreach (var item in queue.GetConsumingEnumerable())
{
var message = string.Format("{3}.Item taken: {0} at {1} by thread {2}.", item, DateTime.Now.ToString("yyyy/MM/dd hh:mm:ss.ffffff"),
Thread.CurrentThread.ManagedThreadId, count);
Console.WriteLine(message);
writer.WriteLine(message);
writer.Flush();
count += 1;
}
}
或者只是写一个lock
来写文件。
至于第二个问题 - 消费者仍然按FIFO顺序提取物品,但由于您有多个消费者 - 处理顺序当然无法保证,因为所有消费者并行处理物品。消费者A拉动项目1,消费者B同时拉动项目2。消费者A处理项目1需要100ms,消费者B处理项目2需要10ms。结果 - 项目2在项目1之前被处理(即写入您的日志)。
答案 1 :(得分:1)
如果要确保并行处理块的输出与其输入的顺序相同,如果要使用BlockingCollection
,则必须使用优先级队列等内容。
但是,如果您准备使用更现代的DataFlow
library(任务并行库的一部分),则有一种更简单的方法。
这是一个示例程序。请注意,这使用await
,但不必使用DataFlow。队列中的项类型为int
,但您可以指定任何类型 - int
恰好是一种简单的演示方式。
关于此示例程序需要注意的重要事项是输入项由多个线程并行处理,但最终输出仍然与输入的顺序相同。
如果你看到输出&#34;从线程Y&#34;返回X.你会发现X并不总是与排队的顺序相同。工作线程可以以非输入顺序返回数据。
但是如果你看一下输出&#34;输出X&#34;你会看到X与排队的顺序相同(单调递增)。
输出队列确保输出的顺序正确。
using System;
using System.Threading;
using System.Threading.Tasks;
using System.Threading.Tasks.Dataflow;
namespace ConsoleApp1
{
public class Program
{
static void Main()
{
var inQueue = new TransformBlock<int, int>(item => process(item), processBlockOptions());
var outQueue = new ActionBlock<int>(item => output(item), outputBlockOptions());
inQueue.LinkTo(outQueue, new DataflowLinkOptions {PropagateCompletion = true});
var task = queueData(inQueue);
Console.WriteLine("Waiting for task to complete in thread " + Thread.CurrentThread.ManagedThreadId);
task.Wait();
Console.WriteLine("Completed.");
}
static async Task queueData(TransformBlock<int, int> executor)
{
await enqueue(executor);
Console.WriteLine("Indicating that no more data will be queued.");
executor.Complete(); // Indicate that no more items will be queued.
Console.WriteLine("Waiting for queue to empty.");
await executor.Completion; // Wait for executor queue to empty.
}
static async Task enqueue(TransformBlock<int, int> executor)
{
for (int i = 0; i < 100; ++i)
{
Console.WriteLine("Queuing data " + i);
int v = i;
await executor.SendAsync(v); // Queues a method that returns v.
}
}
static int process(int value) // Procss value by adding 1000 to it.
{
Console.WriteLine($"Thread {Thread.CurrentThread.ManagedThreadId} is processing item {value}");
value += 1000;
Thread.Sleep(150+nextRand()); // Simulate work.
Console.WriteLine($"Returning {value} from thread {Thread.CurrentThread.ManagedThreadId}");
return value;
}
static void output(int value)
{
Console.WriteLine($"Outputting {value}");
}
static ExecutionDataflowBlockOptions processBlockOptions()
{
return new ExecutionDataflowBlockOptions
{
MaxDegreeOfParallelism = 4,
BoundedCapacity = 8
};
}
static ExecutionDataflowBlockOptions outputBlockOptions()
{
return new ExecutionDataflowBlockOptions
{
MaxDegreeOfParallelism = 1,
BoundedCapacity = 1
};
}
static int nextRand()
{
lock (rngLock)
{
return rng.Next(250);
}
}
static Random rng = new Random();
static object rngLock = new object();
}
}
使用从MaxDegreeOfParallelism
返回的BoundedCapacity
和processBlockOptions()
的值,这很有趣。
例如,请使用MaxDegreeOfParallelism 8
和BoundedCapacity = 16
进行尝试。
[编辑]回答您关于&#34的问题;该项目如何处理?&#34; - 我怀疑它是因为你的输出记录不是线程安全的(根据Evk的好答案)