我有一个线程,它以小块的形式读取数据并将其放入ImmutableList<T>
。我需要将此列表转储到文件中。最初我在每次更新时都倾倒了列表,但随着时间列表的增加,文件大小现在接近200Mb,因此写入文件需要花费太多时间来保持同步。
我已经使文件写入异步,我目前的代码是:
public ImmutableList<T> Items { get; private set; }
public void Insert(IEnumerable<T> items)
{
lock (_syncObj)
{
Items = Items.AddRange(items).Sort(_sortOrder);
QueueSaving();
}
}
void SavingThread()
{
for (; ; )
{
var snapshot = Items;
SaveItems(snapshot);
lock (_syncObj)
{
if (snapshot == Items)
Monitor.Wait(_syncObj);
}
}
}
void QueueSaving()
{
lock (_syncObj)
Monitor.Pulse(_syncObj);
}
即。如果自上次写入以来有很多更新,则writer只保存最后一个版本。 显然,我现在必须有一个专门用于保存的线程,以及没有更新时休眠的线程。
避免使用专用编写器线程最干净的代码是什么?即运行QueueSaving直到文件编写器赶上列表更改并完成,然后在下一次更改时启动它?
答案 0 :(得分:3)
正如我从评论中理解的那样,您只是希望将逻辑转换为异步代码。下面是如何在没有明确的单独线程的情况下完成此操作(除了在整个过程中使用Task.Run
之外)。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
namespace ConsoleApplication
{
public class Worker
{
class Item
{
public string Data { get; set; }
}
const int SAVE_AFTER = 2;
string _fileName;
List<Item> _items;
int _savedItemsCount = 0;
CancellationToken _token;
Task _processTask;
Task _pendingSaveTask = null;
// get next item
async Task<Item> GetNextItemAsync()
{
await Task.Delay(500); // delay for testing
return new Item { Data = "Item from " + DateTime.Now.ToString() };
}
// write
async Task SaveItemsAsync(Item[] items)
{
if (_pendingSaveTask != null)
await _pendingSaveTask; // await the previous save
var text = items.Aggregate(String.Empty, (a, b) => a + b.Data + Environment.NewLine);
using (var writer = new System.IO.StreamWriter(_fileName, append: false))
{
await writer.WriteAsync(text);
}
}
// main process
async Task ProcessAsync()
{
while (true)
{
_token.ThrowIfCancellationRequested();
// start getting the next item
var getNextItemTask = GetNextItemAsync();
// save the snapshot if needed
if (_items.Count >= _savedItemsCount + SAVE_AFTER)
{
var snapshot = _items.ToArray();
_savedItemsCount = snapshot.Length;
_pendingSaveTask = SaveItemsAsync(snapshot);
}
// await the next item
var item = await getNextItemTask;
_items.Add(item);
}
}
// start
public void Start(CancellationToken token)
{
_token = token;
_fileName = System.IO.Path.GetTempFileName();
_items = new List<Item>();
_processTask = Task.Run(new Func<Task>(ProcessAsync), _token);
}
// stop
public void Stop()
{
if (_pendingSaveTask != null)
_pendingSaveTask.Wait();
try
{
_processTask.Wait(); // wait for the task to complete
}
catch (Exception ex)
{
// rethrow if anything but OperationCanceledException
if (!(ex is OperationCanceledException))
{
var aggEx = ex as AggregateException;
if (aggEx == null || !(aggEx.InnerException is OperationCanceledException))
throw;
}
}
}
}
class Program
{
public static void Main()
{
var cts = new CancellationTokenSource();
var worker = new Worker();
Console.WriteLine("Start process");
worker.Start(cts.Token);
Thread.Sleep(10000);
Console.WriteLine("Stop process");
cts.Cancel();
worker.Stop();
Console.WriteLine("Press Enter to exit...");
Console.ReadLine();
}
}
}
请注意,如果新项目(GetNextItemAsync
)的到达速度超过SaveItemsAsync
,则会保存最后一个快照,此实施可能会导致越来越多的待处理SaveItemsAsync
调用。如果这是一个问题,您可以通过将SaveItemsAsync
任务限制为仅一个待处理实例并使用BlockingCollection
对新快照进行排队来处理它。
[更新] 这是一个稍微改进的版本,如果更新速度快于保存,则可以消除冗余写入。它不使用BlockingCollection
,而是向SaveItemsAsync
添加一些额外的取消逻辑。这是一个控制台应用程序,随时可以尝试看看发生了什么。尝试连续几次致电_saveTask = SaveItemsAsync(snapshot)
。
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
namespace ConsoleApplication
{
public class Worker
{
class Item
{
public string Data { get; set; }
}
const int SAVE_AFTER = 2;
string _fileName;
List<Item> _items;
int _savedItemsCount = 0;
CancellationToken _token;
Task _processTask;
Task _saveTask;
CancellationTokenSource _saveTaskCts;
// get next item
async Task<Item> GetNextItemAsync()
{
Console.WriteLine("Enter GetNextItemAsync, thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
await Task.Delay(500); // delay for testing
return new Item { Data = "Item from " + DateTime.Now.ToString() };
}
// save items
async Task SaveItemsAsync(Item[] items)
{
// avoid multiple pending SaveItemsAsync tasks
Console.WriteLine("Enter SaveItemsAsync, thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
var oldSaveTaskCts = _saveTaskCts;
var oldSaveTask = _saveTask;
var thisSaveTaskCts = CancellationTokenSource.CreateLinkedTokenSource(_token);
_saveTaskCts = thisSaveTaskCts;
_saveTask = null;
// cancel the previous pending SaveItemsAsync, if any
if (oldSaveTaskCts != null)
{
oldSaveTaskCts.Cancel();
if (oldSaveTask != null)
await oldSaveTask.WaitObservingCancellationAsync();
}
// another SaveItemsAsync call should lead to cancelling this one
thisSaveTaskCts.Token.ThrowIfCancellationRequested();
// execute the save logic on a pool thread,
// Task.Run automatically unwraps the nested Task<Task>
await Task.Run(async () =>
{
// do the CPU-bound work: create textual representation of data
var text = items.Aggregate(String.Empty, (agg, item) => agg + item.Data + Environment.NewLine);
// write asynchronously
Console.WriteLine("Write, thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
// StreamWriter doesn't support cancellation, so do it in two stages with MemoryStream
using (var memoryStream = new MemoryStream())
{
// write to a memory stream first
using (var writer = new StreamWriter(
memoryStream,
encoding: System.Text.Encoding.UTF8,
bufferSize: Environment.SystemPageSize,
leaveOpen: true))
{
await writer.WriteAsync(text);
}
thisSaveTaskCts.Token.ThrowIfCancellationRequested();
// copy the memory stream to the file
using (var fileStream = new FileStream(_fileName, FileMode.Create, FileAccess.Write))
{
// copy with possible cancellation
memoryStream.Seek(0, SeekOrigin.Begin);
await memoryStream.CopyToAsync(fileStream, Environment.SystemPageSize, thisSaveTaskCts.Token);
}
}
}, thisSaveTaskCts.Token);
}
// main process
async Task ProcessAsync()
{
while (true)
{
// handle cancellation
if (_token.IsCancellationRequested)
{
// await the pending save if any, before throwing
if (_saveTask != null)
await _saveTask.WaitObservingCancellationAsync();
_token.ThrowIfCancellationRequested();
}
// handle last save errors if any
if (_saveTask != null && _saveTask.IsFaulted)
await _saveTask.WaitObservingCancellationAsync();
// start getting the next item
var getNextItemTask = GetNextItemAsync();
// save the snapshot if needed
if (_items.Count >= _savedItemsCount + SAVE_AFTER)
{
var snapshot = _items.ToArray();
_savedItemsCount = snapshot.Length;
_saveTask = SaveItemsAsync(snapshot);
}
// await the next item
var item = await getNextItemTask;
_items.Add(item);
}
}
// start
public void Start(CancellationToken token)
{
_token = token;
_fileName = System.IO.Path.GetTempFileName();
_items = new List<Item>();
_processTask = Task.Run(new Func<Task>(ProcessAsync), _token);
}
// stop
public void Stop()
{
_processTask.WaitObservingCancellation();
}
}
// Main
class Program
{
public static void Main()
{
var cts = new CancellationTokenSource();
var worker = new Worker();
Console.WriteLine("Start process");
worker.Start(cts.Token);
Thread.Sleep(10000);
Console.WriteLine("Stop process");
cts.Cancel();
worker.Stop();
Console.WriteLine("Press Enter to exit...");
Console.ReadLine();
}
}
// Useful extensions
public static class Extras
{
// check if exception is OperationCanceledException
public static bool IsOperationCanceledException(this Exception ex)
{
if (ex is OperationCanceledException)
return true;
var aggEx = ex as AggregateException;
return aggEx != null && aggEx.InnerException is OperationCanceledException;
}
public static async Task WaitObservingCancellationAsync(this Task task)
{
try
{
await task; // await the task to complete
}
catch (Exception ex)
{
// rethrow if anything but OperationCanceledException
if (!ex.IsOperationCanceledException())
throw;
}
}
// a helper to wait for the task to complete and observe exceptions
public static void WaitObservingCancellation(this Task task)
{
try
{
task.Wait(); // wait for the task to complete
}
catch (Exception ex)
{
// rethrow if anything but OperationCanceledException
if (!ex.IsOperationCanceledException())
throw;
}
}
}
}
答案 1 :(得分:1)
当列表中未保存的项目数超过阈值时,启动编写器Task
。您可以将该逻辑放入现有锁定下的Insert
。这样,作家线程/ Task
只有在有工作要做时才存在。
我最近实现了一个类似的事情,我使用Timer
以某个时间表启动持久性工作。
答案 2 :(得分:0)
这是我最后写的内容:
public ImmutableList<T> Items { get; private set; }
public void Insert(IEnumerable<T> items)
{
lock (_syncObj)
{
Items = Items.AddRange(items).Sort(_sortOrder);
StartSaving();
}
}
Task _activeSavingTask;
void SavingThread()
{
for (;;)
{
var snapshot = Items;
SaveItems(snapshot);
lock (_syncObj)
{
if (snapshot == Items)
{
_activeSavingTask = null;
return;
}
}
}
}
void StartSaving()
{
lock (_syncObj)
if (_activeSavingTask == null)
_activeSavingTask = Task.Factory
.StartNew(SavingThread, TaskCreationOptions.LongRunning);
}
答案 3 :(得分:0)
class AsyncSaver<T> where T : class
{
private readonly object _lock = new object();
private readonly Func<T, Task> _save;
private T _item;
private bool _running;
public AsyncSaver(Func<T, Task> save)
{
_save = save;
}
public void Enqueue(T item)
{
lock (_lock)
{
if (_running)
{
_item = item;
}
else
{
_running = true;
Save(item);
}
}
}
private async void Save(T item)
{
await _save(item);
lock (_lock)
{
if (_item != null)
{
var nextItem = _item;
_item = null;
Save(nextItem);
}
else
{
_running = false;
}
}
}
}