具有限制持续时间和批量消耗的异步生产者/消费者

时间:2016-10-05 04:07:35

标签: c# async-await task-parallel-library producer-consumer tpl-dataflow

我正在尝试构建一个服务,为许多异步客户端提供队列以发出请求并等待响应。我需要能够按Y持续时间的X请求限制队列处理。例如:每秒50个Web请求。它适用于第三方REST服务,我每秒只能发出X请求。

发现了许多SO问题,它引导我走下了使用TPL Dataflow的道路,我使用了TranformBlock来提供我的自定义限制,然后使用X个ActionBlock来并行完成任务。 Action的实现看起来有点笨拙,所以想知道是否有更好的方法让我将Tasks传递给管道,一旦完成就通知调用者。

我想知道是否有更好或更优/更简单的方法来做我想要的事情?我的实施有什么明显的问题吗?我知道它缺少取消和异常处理,我接下来会这样做,但是你的评论最受欢迎。

Extended Stephen Cleary's example for my Dataflow pipeline并使用了
svick's concept of a time throttled TransformBlock。我想知道我所构建的内容是否可以通过纯SemaphoreSlim design轻松实现,其基于时间的最大操作限制我认为会使事情复杂化。

这是最新的实施。 FIFO队列异步队列,我可以传递自定义操作。

public class ThrottledProducerConsumer<T>
{
    private class TimerState<T1>
    {
        public SemaphoreSlim Sem;
        public T1 Value;
    }

    private BufferBlock<T> _queue;
    private IPropagatorBlock<T, T> _throttleBlock;
    private List<Task> _consumers;

    private static IPropagatorBlock<T1, T1> CreateThrottleBlock<T1>(TimeSpan Interval, Int32 MaxPerInterval)
    {
        SemaphoreSlim _sem = new SemaphoreSlim(MaxPerInterval);
        return new TransformBlock<T1, T1>(async (x) =>
        {
            var sw = new Stopwatch();
            sw.Start();
            //Console.WriteLine($"Current count: {_sem.CurrentCount}");
            await _sem.WaitAsync();

            sw.Stop();
            var now = DateTime.UtcNow;
            var releaseTime = now.Add(Interval) - now;

            //-- Using timer as opposed to Task.Delay as I do not want to await or wait for it to complete
            var tm = new Timer((s) => {
                var state = (TimerState<T1>)s;
                //Console.WriteLine($"RELEASE: {state.Value} was released {DateTime.UtcNow:mm:ss:ff} Reset Sem");
                state.Sem.Release();

            }, new TimerState<T1> { Sem = _sem, Value = x }, (int)Interval.TotalMilliseconds,
            -1);

            /*  
            Task.Delay(delay).ContinueWith((t)=>
            {
                Console.WriteLine($"RELEASE(FAKE): {x} was released {DateTime.UtcNow:mm:ss:ff} Reset Sem");
                //_sem.Release();
            });
            */

            //Console.WriteLine($"{x} was tramsformed in {sw.ElapsedMilliseconds}ms. Will release {now.Add(Interval):mm:ss:ff}");
            return x;
        },
             //new ExecutionDataflowBlockOptions { BoundedCapacity = 1 });
             //
             new ExecutionDataflowBlockOptions { BoundedCapacity = 5, MaxDegreeOfParallelism = 10 });
    }

    public ThrottledProducerConsumer(TimeSpan Interval, int MaxPerInterval, Int32 QueueBoundedMax = 5, Action<T> ConsumerAction = null, Int32 MaxConsumers = 1)
    {
        var consumerOptions = new ExecutionDataflowBlockOptions { BoundedCapacity = 1, };
        var linkOptions = new DataflowLinkOptions { PropagateCompletion = true,  };

        //-- Create the Queue
        _queue = new BufferBlock<T>(new DataflowBlockOptions { BoundedCapacity = QueueBoundedMax, });

        //-- Create and link the throttle block
        _throttleBlock = CreateThrottleBlock<T>(Interval, MaxPerInterval);
        _queue.LinkTo(_throttleBlock, linkOptions);

        //-- Create and link the consumer(s) to the throttle block
        var consumerAction = (ConsumerAction != null) ? ConsumerAction : new Action<T>(ConsumeItem);
        _consumers = new List<Task>();
        for (int i = 0; i < MaxConsumers; i++)
        {
            var consumer = new ActionBlock<T>(consumerAction, consumerOptions);
            _throttleBlock.LinkTo(consumer, linkOptions);
            _consumers.Add(consumer.Completion);
        }

        //-- TODO: Add some cancellation tokens to shut this thing down
    }

   /// <summary>
   /// Default Consumer Action, just prints to console
   /// </summary>
   /// <param name="ItemToConsume"></param>
    private void ConsumeItem(T ItemToConsume)
    {
        Console.WriteLine($"Consumed {ItemToConsume} at {DateTime.UtcNow}");
    }

    public async Task EnqueueAsync(T ItemToEnqueue)
    {
        await this._queue.SendAsync(ItemToEnqueue);
    }

    public async Task EnqueueItemsAsync(IEnumerable<T> ItemsToEnqueue)
    {
        foreach (var item in ItemsToEnqueue)
        {
            await this._queue.SendAsync(item);
        }
    }

    public async Task CompleteAsync()
    {
        this._queue.Complete();
        await Task.WhenAll(_consumers);
        Console.WriteLine($"All consumers completed {DateTime.UtcNow}");
    }
}

测试方法

    public class WorkItem<T>
    {
        public TaskCompletionSource<T> tcs;
        //public T respone;
        public string url;
        public WorkItem(string Url)
        {
            tcs = new TaskCompletionSource<T>();
            url = Url;
        }
        public override string ToString()
        {
            return $"{url}";
        }
    }

    public static void TestQueue()
    {
        Console.WriteLine("Created the queue");

        var defaultAction = new Action<WorkItem<String>>(async i => {
            var taskItem = ((WorkItem<String>)i);
            Console.WriteLine($"Consuming: {taskItem.url} {DateTime.UtcNow:mm:ss:ff}");
            //-- Assume calling another async method e.g. await httpClient.DownloadStringTaskAsync(url);
            await Task.Delay(5000);
            taskItem.tcs.SetResult($"{taskItem.url}");
            //Console.WriteLine($"Consumed: {taskItem.url} {DateTime.UtcNow}");
        });

        var queue = new ThrottledProducerConsumer<WorkItem<String>>(TimeSpan.FromMilliseconds(2000), 5, 2, defaultAction);

        var results = new List<Task>();
        foreach (var no in Enumerable.Range(0, 20))
        {
            var workItem = new WorkItem<String>($"http://someurl{no}.com");
            results.Add(queue.EnqueueAsync(workItem));
            results.Add(workItem.tcs.Task);
            results.Add(workItem.tcs.Task.ContinueWith(response =>
            {
                Console.WriteLine($"Received: {response.Result} {DateTime.UtcNow:mm:ss:ff}");
            }));
        }

        Task.WhenAll(results).Wait();
        Console.WriteLine("All Work Items Have Been Processed");
    }

1 个答案:

答案 0 :(得分:1)

自从询问以来,我已经基于TPL Dataflow创建了一个ThrottledConsumerProducer类。它经过了几天的测试,其中包括按顺序排队和完成的并行生产者,大约281k没有任何问题,但是我有没有发现的错误。

  1. 我使用 BufferBlock 作为异步队列,这链接到:
  2. TransformBlock ,它提供了我需要的限制和阻止功能。它与SempahoreSlim一起使用来控制最大请求。当每个项目通过块时,它会递增信号量并调度任务以便稍后运行X持续时间以将信号量释放一个。这样我每个持续时间都有一个X请求的滑动窗口;正是我想要的。由于TPL,我也在利用与连接的并行性:
  3. ActionBlock (s)负责执行我需要的任务。
  4. 这些类是通用的,因此如果它们需要类似的东西,它可能对其他人有用。我没有写过取消或错误处理,但我想我应该将其标记为已回答以便移动它。我很乐意看到一些替代方案和反馈,而不是将我的标记作为公认的答案。谢谢你的阅读。

    注意:我从原始实现中移除了Timer,因为它正在做奇怪的事情导致信号量释放超过最大值,我假设它是动态上下文错误,它发生在我开始时运行并发请求。我使用Task.Delay来安排发布信号量锁。

    受限制的生产者消费者

    public class ThrottledProducerConsumer<T>
    {
        private BufferBlock<T> _queue;
        private IPropagatorBlock<T, T> _throttleBlock;
        private List<Task> _consumers;
    
        private static IPropagatorBlock<T1, T1> CreateThrottleBlock<T1>(TimeSpan Interval, 
            Int32 MaxPerInterval, Int32 BlockBoundedMax = 2, Int32 BlockMaxDegreeOfParallelism = 2)
        {
            SemaphoreSlim _sem = new SemaphoreSlim(MaxPerInterval, MaxPerInterval);
            return new TransformBlock<T1, T1>(async (x) =>
            {
                //Log($"Transform blk: {x} {DateTime.UtcNow:mm:ss:ff} Semaphore Count: {_sem.CurrentCount}");
                var sw = new Stopwatch();
                sw.Start();
                //Console.WriteLine($"Current count: {_sem.CurrentCount}");
                await _sem.WaitAsync();
    
                sw.Stop();
                var delayTask = Task.Delay(Interval).ContinueWith((t) =>
                {
                    //Log($"Pre-RELEASE: {x} {DateTime.UtcNow:mm:ss:ff} Semaphore Count {_sem.CurrentCount}");
                    _sem.Release();
                    //Log($"PostRELEASE: {x} {DateTime.UtcNow:mm:ss:ff} Semaphoere Count {_sem.CurrentCount}");
                });
                //},TaskScheduler.FromCurrentSynchronizationContext());                
                //Log($"Transformed: {x} in queue {sw.ElapsedMilliseconds}ms. {DateTime.Now:mm:ss:ff} will release {DateTime.Now.Add(Interval):mm:ss:ff} Semaphoere Count {_sem.CurrentCount}");
                return x;
            },
                 //-- Might be better to keep Bounded Capacity in sync with the semaphore
                 new ExecutionDataflowBlockOptions { BoundedCapacity = BlockBoundedMax,
                     MaxDegreeOfParallelism = BlockMaxDegreeOfParallelism });
        }
    
        public ThrottledProducerConsumer(TimeSpan Interval, int MaxPerInterval, 
            Int32 QueueBoundedMax = 5, Action<T> ConsumerAction = null, Int32 MaxConsumers = 1, 
            Int32 MaxThrottleBuffer = 20, Int32 MaxDegreeOfParallelism = 10)
        {
            //-- Probably best to link MaxPerInterval and MaxThrottleBuffer 
            //  and MaxConsumers with MaxDegreeOfParallelism
            var consumerOptions = new ExecutionDataflowBlockOptions { BoundedCapacity = 1, };
            var linkOptions = new DataflowLinkOptions { PropagateCompletion = true,  };
    
            //-- Create the Queue
            _queue = new BufferBlock<T>(new DataflowBlockOptions { BoundedCapacity = QueueBoundedMax, });
    
            //-- Create and link the throttle block
            _throttleBlock = CreateThrottleBlock<T>(Interval, MaxPerInterval);
            _queue.LinkTo(_throttleBlock, linkOptions);
    
            //-- Create and link the consumer(s) to the throttle block
            var consumerAction = (ConsumerAction != null) ? ConsumerAction : new Action<T>(ConsumeItem);
            _consumers = new List<Task>();
            for (int i = 0; i < MaxConsumers; i++)
            {
                var consumer = new ActionBlock<T>(consumerAction, consumerOptions);
                _throttleBlock.LinkTo(consumer, linkOptions);
                _consumers.Add(consumer.Completion);
            }
    
            //-- TODO: Add some cancellation tokens to shut this thing down
        }
    
       /// <summary>
       /// Default Consumer Action, just prints to console
       /// </summary>
       /// <param name="ItemToConsume"></param>
        private void ConsumeItem(T ItemToConsume)
        {
            Log($"Consumed {ItemToConsume} at {DateTime.UtcNow}");
        }
    
        public async Task EnqueueAsync(T ItemToEnqueue)
        {
            await this._queue.SendAsync(ItemToEnqueue);
        }
    
        public async Task EnqueueItemsAsync(IEnumerable<T> ItemsToEnqueue)
        {
            foreach (var item in ItemsToEnqueue)
            {
                await this._queue.SendAsync(item);
            }
        }
    
        public async Task CompleteAsync()
        {
            this._queue.Complete();
            await Task.WhenAll(_consumers);
            Console.WriteLine($"All consumers completed {DateTime.UtcNow}");
        }
        private static void Log(String messageToLog)
        {
            System.Diagnostics.Trace.WriteLine(messageToLog);
            Console.WriteLine(messageToLog);
        }
    
    }
    

    - 示例用法 -

    通用WorkItem

    public class WorkItem<Toutput,Tinput>
    {
        private TaskCompletionSource<Toutput> _tcs;
        public Task<Toutput> Task { get { return _tcs.Task; } }
    
        public Tinput InputData { get; private set; }
        public Toutput OutputData { get; private set; }
    
        public WorkItem(Tinput inputData)
        {
            _tcs = new TaskCompletionSource<Toutput>();
            InputData = inputData;
        }
    
        public void Complete(Toutput result)
        {
            _tcs.SetResult(result);
        }
    
        public void Failed(Exception ex)
        {
            _tcs.SetException(ex);
        }
    
        public override string ToString()
        {
            return InputData.ToString();
        }
    }
    

    创建在管道中执行的操作块

        private Action<WorkItem<Location,PointToLocation>> CreateProcessingAction()
        {
            return new Action<WorkItem<Location,PointToLocation>>(async i => {
                var sw = new Stopwatch();
                sw.Start();
    
                var taskItem = ((WorkItem<Location,PointToLocation>)i);
                var inputData = taskItem.InputData;
    
                //Log($"Consuming: {inputData.Latitude},{inputData.Longitude} {DateTime.UtcNow:mm:ss:ff}");
    
                //-- Assume calling another async method e.g. await httpClient.DownloadStringTaskAsync(url);
                await Task.Delay(500);
                sw.Stop();
                Location outData = new Location()
                {
                    Latitude = inputData.Latitude,
                    Longitude = inputData.Longitude,
                    StreetAddress = $"Consumed: {inputData.Latitude},{inputData.Longitude} Duration(ms): {sw.ElapsedMilliseconds}"
                };
                taskItem.Complete(outData);
                //Console.WriteLine($"Consumed: {taskItem.url} {DateTime.UtcNow}");
            });
    
        }
    

    测试方法 您需要为PointToLocation和Location提供自己的实现。只是一个例子,说明如何在自己的课程中使用它。

        int startRange = 0;
        int nextRange = 1000;
        ThrottledProducerConsumer<WorkItem<Location,PointToLocation>> tpc;
        private void cmdTestPipeline_Click(object sender, EventArgs e)
        {
            Log($"Pipeline test started {DateTime.Now:HH:mm:ss:ff}");
    
            if(tpc == null)
            {
                tpc = new ThrottledProducerConsumer<WorkItem<Location, PointToLocation>>(
                    //1010, 2, 20000,
                    TimeSpan.FromMilliseconds(1010), 45, 100000,
                    CreateProcessingAction(),
                    2,45,10);
            }
    
            var workItems = new List<WorkItem<Models.Location, PointToLocation>>();
            foreach (var i in Enumerable.Range(startRange, nextRange))
            {
                var ptToLoc = new PointToLocation() { Latitude = i + 101, Longitude = i + 100 };
                var wrkItem = new WorkItem<Location, PointToLocation>(ptToLoc);
                workItems.Add(wrkItem);
    
    
                wrkItem.Task.ContinueWith(t =>
                {
                    var loc = t.Result;
                    string line = $"[Simulated:{DateTime.Now:HH:mm:ss:ff}] - {loc.StreetAddress}";
                    //txtResponse.Text = String.Concat(txtResponse.Text, line, System.Environment.NewLine);
                    //var lines = txtResponse.Text.Split(new string[] { System.Environment.NewLine},
                    //    StringSplitOptions.RemoveEmptyEntries).LongCount();
    
                    //lblLines.Text = lines.ToString();
                    //Log(line);
    
                });
                //}, TaskScheduler.FromCurrentSynchronizationContext());
    
            }
    
            startRange += nextRange;
    
            tpc.EnqueueItemsAsync(workItems);
    
            Log($"Pipeline test completed {DateTime.Now:HH:mm:ss:ff}");
        }