c#多线程处理批量为100

时间:2016-11-25 22:49:25

标签: c# multithreading asynchronous async-await batch-processing

我有一个500.000.000行的文件。

这些行是最多10个字符的字符串。

如何使用多线程和批量100来处理此文件?

2 个答案:

答案 0 :(得分:2)

使用MoreLinq的>>> L = list(itertools.product(range(1, N+1), repeat=K)) >>> L.sort(key=lambda t: len(set(t))) >>> L [(1, 1), (2, 2), (3, 3), (1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)] 方法,这将创建一个Batch的集合,其中包含100行的批量大小,它将为每100行旋转一个新任务。

这是一个基本的实现,使用IEnumerable<string>在任何给定时间只运行一定数量的任务,并且看到{500}的性能开销Semaphore可能是明智的线。

File.ReadAllLines

答案 1 :(得分:1)

如果您使用内置TPL中的Parallel.ForEach并编写几个枚举器(如下所列),则不需要使用其他库。您的代码可能如下所示:

using (var input = new StreamReader(File.OpenRead(@"c:\path\to\my\file.txt")))
{
    Parallel.ForEach(
        input.ReadLines().TakeChunks(100),
        new ParallelOptions() { MaxDegreeOfParallelism = 8 /* better be number of CPU cores */ },
        batchOfLines => {
            DoMyProcessing(batchOfLines);
        });
}

要使其工作,您需要IEnumerable<T>上的几个扩展方法和几个枚举器,定义如下:

public static class EnumerableExtensions
{
    public static IEnumerable<string> ReadLines(this StreamReader input)
    {
        return new LineReadingEnumerable(input);
    }

    public static IEnumerable<IReadOnlyList<T>> TakeChunks<T>(this IEnumerable<T> source, int length)
    {
        return new ChunkingEnumerable<T>(source, length);
    }

    public class LineReadingEnumerable : IEnumerable<string>
    {
        private readonly StreamReader _input;

        public LineReadingEnumerable(StreamReader input)
        {
            _input = input;
        }
        public IEnumerator<string> GetEnumerator()
        {
            return new LineReadingEnumerator(_input);
        }
        IEnumerator IEnumerable.GetEnumerator()
        {
            return GetEnumerator();
        }
    }

    public class LineReadingEnumerator : IEnumerator<string>
    {
        private readonly StreamReader _input;
        private string _current;

        public LineReadingEnumerator(StreamReader input)
        {
            _input = input;
        }
        public void Dispose()
        {
            _input.Dispose();
        }
        public bool MoveNext()
        {
            _current = _input.ReadLine();
            return (_current != null);
        }
        public void Reset()
        {
            throw new NotSupportedException();
        }
        public string Current
        {
            get { return _current; }
        }
        object IEnumerator.Current
        {
            get { return _current; }
        }
    }

    public class ChunkingEnumerable<T> : IEnumerable<IReadOnlyList<T>>
    {
        private readonly IEnumerable<T> _inner;
        private readonly int _length;

        public ChunkingEnumerable(IEnumerable<T> inner, int length)
        {
            _inner = inner;
            _length = length;
        }
        public IEnumerator<IReadOnlyList<T>> GetEnumerator()
        {
            return new ChunkingEnumerator<T>(_inner.GetEnumerator(), _length);
        }
        IEnumerator IEnumerable.GetEnumerator()
        {
            return this.GetEnumerator();
        }
    }

    public class ChunkingEnumerator<T> : IEnumerator<IReadOnlyList<T>>
    {
        private readonly IEnumerator<T> _inner;
        private readonly int _length;
        private IReadOnlyList<T> _current;
        private bool _endOfInner;

        public ChunkingEnumerator(IEnumerator<T> inner, int length)
        {
            _inner = inner;
            _length = length;
        }
        public void Dispose()
        {
            _inner.Dispose();
            _current = null;
        }
        public bool MoveNext()
        {
            var currentBuffer = new List<T>();

            while (currentBuffer.Count < _length && !_endOfInner)
            {
                if (!_inner.MoveNext())
                {
                    _endOfInner = true;
                    break;
                }

                currentBuffer.Add(_inner.Current);
            }

            if (currentBuffer.Count > 0)
            {
                _current = currentBuffer;
                return true;
            }

            _current = null;
            return false;
        }
        public void Reset()
        {
            _inner.Reset();
            _current = null;
            _endOfInner = false;
        }
        public IReadOnlyList<T> Current
        {
            get
            {
                if (_current != null)
                {
                    return _current;
                }

                throw new InvalidOperationException();
            }
        }
        object IEnumerator.Current
        {
            get
            {
                return this.Current;
            }
        }
    }
}