使用TPL的PC队列?

时间:2014-09-04 02:25:05

标签: .net queue producer-consumer tpl-dataflow blockingcollection

原始

如何利用TPL和/或并发集合在C#中创建Producer-Consumer Queue?我使用的是.NET 4.5 +。

这是我的第一次尝试:

public class SampleFileProcessor
{
    private readonly BlockingCollection<string> _queue = 
        new BlockingCollection<string>();

    public void ProcessFile(string fileName)
    {
        // Imagine this method is inside a WCF service and gets called from
        // thousands of different clients, and very frequently from each one.
        // This doesn't feel like correct design. I'm firing off a separate 
        // threadpool task to process a single item every time?
        // Is the TPL smart enough to re-use an existing thread that is already 
        // running the ConsumeQueue() method? Is this efficient?

        _queue.Add(fileName);         
        Task.Run(() => ConsumeQueue());
    }

    private void ConsumeQueue()
    {
        foreach (var item in _queue.GetConsumingEnumerable())
        {
            // fake a time-consuming process.
            System.Threading.Thread.Sleep(1000);
        }
    }
}

编辑2014年9月5日:基于反馈的第2版。

public class SampleFileProcessor
{
    private int _count;
    private readonly BlockingCollection<string> _queue =
        new BlockingCollection<string>();        

    public void ProcessFile(string fileName)
    {
        _queue.Add(fileName);
        Interlocked.Increment(ref _count);
        Task.Run(() => ConsumeQueue());
    }

    private void ConsumeQueue()
    {
        while (_count > 0)
        {
            string item = null;
            if (_queue.TryTake(out item, 1000))
            {
                // fake a time-consuming process.
                Thread.Sleep(1000);
                Interlocked.Decrement(ref _count);
            }
        }
    }
}

编辑2014年9月7日:问题解决了。

我发现Microsoft Task Parallel Library Dataflow类已经完全封装了我想要的内容以及更多内容。 NuGet包:Install-Package Microsoft.Tpl.Dataflow

为了让社区受益,我将分享一些测试代码。 (也在https://dotnetfiddle.net/WbwUqz

using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using System.Threading.Tasks.Dataflow;

namespace TPLDataFlowTest
{
    class Program
    {
        static void Main(string[] args)
        {  
            // Task Parallel Library Dataflow:
            // http://msdn.microsoft.com/en-us/library/hh228603(v=vs.110).aspx
            // Install-Package Microsoft.Tpl.Dataflow

            // Other resources on the web:
            // http://channel9.msdn.com/Shows/Going+Deep/Stephen-Toub-Inside-TPL-Dataflow
            // http://social.msdn.microsoft.com/Forums/en-US/home?forum=tpldataflow

            var tasks = new List<Task>();

            Console.WriteLine("\n\nBasicPCQueue =>");
            tasks.Add(TestBasicPCQueue());

            Console.WriteLine("\n\nBatchingPCQueue =>");
            tasks.Add(TestBatchingPCQueue());

            Task.WaitAll(tasks.ToArray());
            Console.WriteLine("\n\nPress Enter to quit...");
            Console.ReadLine();
        }

        public static Task TestBasicPCQueue()
        {
            var action = new ActionBlock<string>(
                async s =>
                {
                    Console.WriteLine("{0}...", s);
                    await Task.Delay(2000);
                    Console.WriteLine("{0}", s);
                },
                new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = 2 }
            );

            Parallel.For(0, 5,
                i =>
                {
                    string item = "BasicPCQ " + i;
                    action.Post(item);
                    Console.WriteLine("BasicPCQ posted " + i);
                }
            );

            action.Complete();
            return action.Completion;
        }

        public static Task TestBatchingPCQueue()
        {
            int count = 5;
            int remaining = count;
            var taskSource = new TaskCompletionSource<bool>();

            var pcq = new PCQueue<string>(
                action: array =>
                {
                    var s = string.Join(",", array);
                    Console.WriteLine("PCQ [{0}] waiting 2000 ms ...", s);
                    Task.Delay(2000).Wait();
                    Console.WriteLine("PCQ [{0}] done", s);
                    remaining -= array.Length;
                    if (remaining == 0)
                    {
                        taskSource.SetResult(true);
                    }
                },
                batchSize: 2,
                maxDegreeOfParallism: 1,
                batchFlushAfterMilliseconds: 3000);

            Parallel.For(0, count, i =>
            {
                pcq.Add(i.ToString());
                Console.WriteLine("PCQ posted " + i);
            });

            return taskSource.Task;
        }

    }

    public class PCQueue<T> where T : class
    {
        private readonly BatchBlock<T> _batchBlock;
        private readonly ActionBlock<T[]> _actionBlock;
        private readonly System.Timers.Timer _batchTriggerTimer;
        private int _count;

        public PCQueue(Action<T[]> action, int batchSize, int maxDegreeOfParallism, double batchFlushAfterMilliseconds)
        {
            _batchBlock = new BatchBlock<T>(batchSize);
            _actionBlock = new ActionBlock<T[]>(
                array =>
                {
                    _batchTriggerTimer.Enabled = false;
                    action(array);
                    Interlocked.Add(ref _count, -array.Length); // decrement _count.
                    _batchTriggerTimer.Enabled = (_count > 0 && _count < _batchBlock.BatchSize);
                },
                new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = maxDegreeOfParallism }
            );

            _batchBlock.LinkTo(_actionBlock, new DataflowLinkOptions { PropagateCompletion = true });

            // This is a watchdog timer to flush the batchBlock periodically if the 
            // number of items in the batch are below the threshold for 
            // auto-triggering. I wish this were built into the BatchBlock class.
            _batchTriggerTimer = new System.Timers.Timer(batchFlushAfterMilliseconds);
            _batchTriggerTimer.Elapsed += delegate
            {
                _batchTriggerTimer.Enabled = false;
                _batchBlock.TriggerBatch();
            };
        }

        public void Add(T item)
        {
            _batchBlock.Post(item);
            Interlocked.Increment(ref _count);
            _batchTriggerTimer.Enabled = (_count > 0 && _count < _batchBlock.BatchSize);
        }
    }
}

1 个答案:

答案 0 :(得分:2)

每{{}}}之后你都不应该开始消费。相反,启动一个或多个(作为您的案例的效率要求)长期运行的消费者(请注意,您的消费者Add不会完成,直到您通过foreach结束收集,不< / strong>当CompleteAdding暂时为空时。) 然后,只在生产者中BlockingCollection