使用Rx C#在大型数据集上创建并行过滤

时间:2016-05-24 06:28:13

标签: c# task-parallel-library system.reactive reactive-programming tpl-dataflow

我在C#中使用Rx编写搜索机制。我有一个50k记录的数据集,需要在用户开始输入时运行搜索关键字。我为此创建了以下代码,但我相信有很多与并发和并行相关的问题。请查看代码并告诉我如何针对多核系统进行优化。

代码:

 public class MainPageViewModel : Bindablebase
    {
        private string searchString;

        public string SearchString
        {
            get { return searchString; }
            set { SetProperty(ref searchString, value); }
        }

        private string result = "no result";

        public string Result
        {
            get { return result; }
            set { SetProperty(ref result, value); }
        }

        private ObservableCollection<string> lstItems = new ObservableCollection<string>();

        public ObservableCollection<string> LstItems
        {
            get { return lstItems; }
            set { SetProperty(ref lstItems, value); }
        }

        CoreDispatcher dispatcher = CoreWindow.GetForCurrentThread().Dispatcher;

        private List<int> dataSet1 = new List<int>();

        public MainPageViewModel()
        {
            PopulateSampleDatas();

            // Get stream of input character
            var searchDataStream = this.ToObservable<string>(() => SearchString)
                                       .Throttle(TimeSpan.FromMilliseconds(400));


            // Add Data and Search Mechanism
            var resultStream = searchDataStream
                               // Move to UI thread and clear all the result list for new search keyword result
                               .ObserveOnDispatcher()
                               .Do(str => { LstItems.Clear(); LstItems.Add(SearchString); })
                               // Move to seperate thread for creating bunch of smaller datasets out of large one
                               .ObserveOn(TaskPoolScheduler.Default)
                               .Select(GetFilteredData)
                               // For every new keyword type ignore the previous buffer data and switch to new one
                               .Switch()
                               // run filter operation on those bunch of data in parallel
                               .SelectMany(FilterData);


            // subscribe to search setream
            resultStream.ObserveOnDispatcher().Subscribe(v =>
            {
                foreach (var val in v)
                {
                    LstItems.Add(val.ToString());
                }
            });

        }

        /// <summary>
        /// Filters the data.
        /// </summary>
        /// <param name="arg">The argument.</param>
        /// <returns>Task&lt;List&lt;System.Int32&gt;&gt;.</returns>
        private async Task<List<int>> FilterData(IList<int> arg)
        {
            List<int> result = new List<int>();

            // process the filtering mechism on bunch of datasets
            result = Filtereddata(arg);

            return result;
        }

        private IObservable<IList<int>> GetFilteredData(string arg)
        {
            // create smaller sets of data out of large set to run filter mechism in parallel
            return dataSet1.ToObservable().Buffer(100)
                .ObserveOn(TaskPoolScheduler.Default);
        }

        /// <summary>
        /// Populates the sample datas.
        /// </summary>
        private void PopulateSampleDatas()
        {
            // populate the sample data set
            for (int i = 0; i < 50000; i++)
            {
                dataSet1.Add(i);
            }
        }
    }

0 个答案:

没有答案