懒惰的单线程工作者

时间:2013-11-30 12:16:53

标签: .net multithreading task-parallel-library

我有一个线程,它以小块的形式读取数据并将其放入ImmutableList<T>。我需要将此列表转储到文件中。最初我在每次更新时都倾倒了列表,但随着时间列表的增加,文件大小现在接近200Mb,因此写入文件需要花费太多时间来保持同步。 我已经使文件写入异步,我目前的代码是:

public ImmutableList<T> Items { get; private set; }

public void Insert(IEnumerable<T> items)
{
    lock (_syncObj)
    {
        Items = Items.AddRange(items).Sort(_sortOrder);
        QueueSaving();
    }
}

void SavingThread()
{
    for (; ; )
    {
        var snapshot = Items;
        SaveItems(snapshot);

        lock (_syncObj)
        {
            if (snapshot == Items)
                Monitor.Wait(_syncObj);
        }
    }
}

void QueueSaving()
{
    lock (_syncObj)
        Monitor.Pulse(_syncObj);
}

即。如果自上次写入以来有很多更新,则writer只保存最后一个版本。 显然,我现在必须有一个专门用于保存的线程,以及没有更新时休眠的线程。

避免使用专用编写器线程最干净的代码是什么?即运行QueueSaving直到文件编写器赶上列表更改并完成,然后在下一次更改时启动它?

4 个答案:

答案 0 :(得分:3)

正如我从评论中理解的那样,您只是希望将逻辑转换为异步代码。下面是如何在没有明确的单独线程的情况下完成此操作(除了在整个过程中使用Task.Run之外)。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;

namespace ConsoleApplication
{
    public class Worker
    {
        class Item 
        {
            public string Data { get; set; }
        }

        const int SAVE_AFTER = 2;

        string _fileName;
        List<Item> _items;
        int _savedItemsCount = 0;

        CancellationToken _token;
        Task _processTask;

        Task _pendingSaveTask = null;

        // get next item
        async Task<Item> GetNextItemAsync()
        {
            await Task.Delay(500); // delay for testing
            return new Item { Data = "Item from " + DateTime.Now.ToString() };
        }

        // write
        async Task SaveItemsAsync(Item[] items)
        {
            if (_pendingSaveTask != null)
                await _pendingSaveTask; // await the previous save

            var text = items.Aggregate(String.Empty, (a, b) => a + b.Data + Environment.NewLine);

            using (var writer = new System.IO.StreamWriter(_fileName, append: false))
            {
                await writer.WriteAsync(text);
            }
        }

        // main process
        async Task ProcessAsync()
        {
            while (true)
            {
                _token.ThrowIfCancellationRequested();

                // start getting the next item
                var getNextItemTask = GetNextItemAsync();

                // save the snapshot if needed
                if (_items.Count >= _savedItemsCount + SAVE_AFTER)
                {
                    var snapshot = _items.ToArray();
                    _savedItemsCount = snapshot.Length;
                    _pendingSaveTask = SaveItemsAsync(snapshot);
                }

                // await the next item
                var item = await getNextItemTask;
                _items.Add(item);
            }
        }

        // start
        public void Start(CancellationToken token)
        {
            _token = token;
            _fileName = System.IO.Path.GetTempFileName();
            _items = new List<Item>();

            _processTask = Task.Run(new Func<Task>(ProcessAsync), _token);
        }

        // stop
        public void Stop()
        {
            if (_pendingSaveTask != null)
                _pendingSaveTask.Wait();

            try
            {
                _processTask.Wait(); // wait for the task to complete
            }
            catch (Exception ex)
            {
                // rethrow if anything but OperationCanceledException
                if (!(ex is OperationCanceledException))
                {
                    var aggEx = ex as AggregateException;
                    if (aggEx == null || !(aggEx.InnerException is OperationCanceledException))
                        throw;
                }
            }
        }
    }

    class Program
    {
        public static void Main()
        {
            var cts = new CancellationTokenSource();
            var worker = new Worker();

            Console.WriteLine("Start process");
            worker.Start(cts.Token);

            Thread.Sleep(10000);

            Console.WriteLine("Stop process");
            cts.Cancel();
            worker.Stop();

            Console.WriteLine("Press Enter to exit...");
            Console.ReadLine();
        }
    }
}

请注意,如果新项目(GetNextItemAsync)的到达速度超过SaveItemsAsync,则会保存最后一个快照,此实施可能会导致越来越多的待处理SaveItemsAsync调用。如果这是一个问题,您可以通过将SaveItemsAsync任务限制为仅一个待处理实例并使用BlockingCollection对新快照进行排队来处理它。

[更新] 这是一个稍微改进的版本,如果更新速度快于保存,则可以消除冗余写入。它不使用BlockingCollection,而是向SaveItemsAsync添加一些额外的取消逻辑。这是一个控制台应用程序,随时可以尝试看看发生了什么。尝试连续几次致电_saveTask = SaveItemsAsync(snapshot)

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;

namespace ConsoleApplication
{
    public class Worker
    {
        class Item 
        {
            public string Data { get; set; }
        }

        const int SAVE_AFTER = 2;

        string _fileName;
        List<Item> _items;
        int _savedItemsCount = 0;

        CancellationToken _token;
        Task _processTask;

        Task _saveTask;
        CancellationTokenSource _saveTaskCts;

        // get next item
        async Task<Item> GetNextItemAsync()
        {
            Console.WriteLine("Enter GetNextItemAsync, thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);

            await Task.Delay(500); // delay for testing
            return new Item { Data = "Item from " + DateTime.Now.ToString() };
        }

        // save items
        async Task SaveItemsAsync(Item[] items)
        {
            // avoid multiple pending SaveItemsAsync tasks
            Console.WriteLine("Enter SaveItemsAsync, thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);

            var oldSaveTaskCts = _saveTaskCts;
            var oldSaveTask = _saveTask;

            var thisSaveTaskCts = CancellationTokenSource.CreateLinkedTokenSource(_token);

            _saveTaskCts = thisSaveTaskCts;
            _saveTask = null;

            // cancel the previous pending SaveItemsAsync, if any
            if (oldSaveTaskCts != null) 
            {
                oldSaveTaskCts.Cancel();
                if (oldSaveTask != null)
                    await oldSaveTask.WaitObservingCancellationAsync();
            }

            // another SaveItemsAsync call should lead to cancelling this one
            thisSaveTaskCts.Token.ThrowIfCancellationRequested();

            // execute the save logic on a pool thread, 
            // Task.Run automatically unwraps the nested Task<Task>
            await Task.Run(async () => 
            {
                // do the CPU-bound work: create textual representation of data
                var text = items.Aggregate(String.Empty, (agg, item) => agg + item.Data + Environment.NewLine);

                // write asynchronously
                Console.WriteLine("Write, thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);

                // StreamWriter doesn't support cancellation, so do it in two stages with MemoryStream
                using (var memoryStream = new MemoryStream())
                {
                    // write to a memory stream first
                    using (var writer = new StreamWriter(
                        memoryStream,
                        encoding: System.Text.Encoding.UTF8,
                        bufferSize: Environment.SystemPageSize,
                        leaveOpen: true))
                    {
                        await writer.WriteAsync(text);
                    }

                    thisSaveTaskCts.Token.ThrowIfCancellationRequested();

                    // copy the memory stream to the file
                    using (var fileStream = new FileStream(_fileName, FileMode.Create, FileAccess.Write))
                    {
                        // copy with possible cancellation
                        memoryStream.Seek(0, SeekOrigin.Begin);
                        await memoryStream.CopyToAsync(fileStream, Environment.SystemPageSize, thisSaveTaskCts.Token);
                    }
                }
            }, thisSaveTaskCts.Token);
        }

        // main process
        async Task ProcessAsync()
        {
            while (true)
            {
                // handle cancellation
                if (_token.IsCancellationRequested)
                {
                    // await the pending save if any, before throwing
                    if (_saveTask != null)
                        await _saveTask.WaitObservingCancellationAsync();
                    _token.ThrowIfCancellationRequested();
                }

                // handle last save errors if any
                if (_saveTask != null && _saveTask.IsFaulted)
                    await _saveTask.WaitObservingCancellationAsync();

                // start getting the next item
                var getNextItemTask = GetNextItemAsync();

                // save the snapshot if needed
                if (_items.Count >= _savedItemsCount + SAVE_AFTER)
                {
                    var snapshot = _items.ToArray();
                    _savedItemsCount = snapshot.Length;
                    _saveTask = SaveItemsAsync(snapshot);
                }

                // await the next item
                var item = await getNextItemTask;
                _items.Add(item);
            }
        }

        // start
        public void Start(CancellationToken token)
        {
            _token = token;
            _fileName = System.IO.Path.GetTempFileName();
            _items = new List<Item>();

            _processTask = Task.Run(new Func<Task>(ProcessAsync), _token);
        }

        // stop
        public void Stop()
        {
            _processTask.WaitObservingCancellation();
        }
    }

    // Main
    class Program
    {
        public static void Main()
        {
            var cts = new CancellationTokenSource();
            var worker = new Worker();

            Console.WriteLine("Start process");
            worker.Start(cts.Token);

            Thread.Sleep(10000);

            Console.WriteLine("Stop process");
            cts.Cancel();
            worker.Stop();

            Console.WriteLine("Press Enter to exit...");
            Console.ReadLine();
        }
    }

    // Useful extensions
    public static class Extras
    {
        // check if exception is OperationCanceledException
        public static bool IsOperationCanceledException(this Exception ex)
        {
            if (ex is OperationCanceledException)
                return true;

            var aggEx = ex as AggregateException;
            return aggEx != null && aggEx.InnerException is OperationCanceledException;
        }

        public static async Task WaitObservingCancellationAsync(this Task task)
        {
            try
            {
                await task; // await the task to complete
            }
            catch (Exception ex)
            {
                // rethrow if anything but OperationCanceledException
                if (!ex.IsOperationCanceledException())
                    throw;
            }
        }

        // a helper to wait for the task to complete and observe exceptions
        public static void WaitObservingCancellation(this Task task)
        {
            try
            {
                task.Wait(); // wait for the task to complete
            }
            catch (Exception ex)
            {
                // rethrow if anything but OperationCanceledException
                if (!ex.IsOperationCanceledException())
                    throw;
            }
        }
    }
}

答案 1 :(得分:1)

当列表中未保存的项目数超过阈值时,启动编写器Task。您可以将该逻辑放入现有锁定下的Insert。这样,作家线程/ Task只有在有工作要做时才存在。

我最近实现了一个类似的事情,我使用Timer以某个时间表启动持久性工作。

答案 2 :(得分:0)

这是我最后写的内容:

public ImmutableList<T> Items { get; private set; }

public void Insert(IEnumerable<T> items)
{
    lock (_syncObj)
    {
        Items = Items.AddRange(items).Sort(_sortOrder);
        StartSaving();
    }
}

Task _activeSavingTask;

void SavingThread()
{
    for (;;)
    {
        var snapshot = Items;
        SaveItems(snapshot);

        lock (_syncObj)
        {
            if (snapshot == Items)
            {
                _activeSavingTask = null;
                return;
            }
        }
    }
}

void StartSaving()
{
    lock (_syncObj)
        if (_activeSavingTask == null)
            _activeSavingTask = Task.Factory
                .StartNew(SavingThread, TaskCreationOptions.LongRunning);
}

答案 3 :(得分:0)

class AsyncSaver<T> where T : class
{
    private readonly object _lock = new object();
    private readonly Func<T, Task> _save;
    private T _item;
    private bool _running;

    public AsyncSaver(Func<T, Task> save)
    {
        _save = save;
    }

    public void Enqueue(T item)
    {
        lock (_lock)
        {
            if (_running)
            {
                _item = item;
            }
            else
            {
                _running = true;
                Save(item);
            }
        }
    }

    private async void Save(T item)
    {
        await _save(item);

        lock (_lock)
        {
            if (_item != null)
            {
                var nextItem = _item;
                _item = null;
                Save(nextItem);
            }
            else
            {
                _running = false;
            }
        }
    }
}