如何并行化此C#算法?

时间:2019-03-22 18:50:22

标签: c# parallel-processing task-parallel-library

我是C#初学者,所以我对Task或线程一无所知。我写了这段代码,我想使用某种并行或线程处理。

代码包含在两个DataTable(A和B)中,我必须将A的每个单元格值与B的所有单元格进行比较。B包含在一列和几行中。一个可以是数百万个细胞。我用for循环来做。这是我想并行化以加快处理过程的代码的一部分:

  private DataTable CalculosPrincipales(DataTable Prof, DataTable Prop, DataTable Rango)
        {
            DataTable dt = new DataTable();
            dt.Columns.Add("Prof Evaluar", typeof(double));
            dt.Columns.Add("Profundidad", typeof(double));
            dt.Columns.Add("Promedio", typeof(double));
            dt.Columns.Add("Sumatoria", typeof(double));
            dt.Columns.Add("n", typeof(double));

            if (int.TryParse(box_Z.Text, out int z))
            {

            }
            var step = (progressBar.Properties.Maximum - (Int32)progressBar.EditValue)/z;

            for (int i = 0; i < Rango.Rows.Count-1; i++)
            {
                dt.Rows.Add(Rango.Rows[i][0], Rango.Rows[i][1], 0, 0 , 0);
            }

            double prof_celda;
            double prof_rango;
            double prop_celda;

            for (int i = 0; i < Prop.Rows.Count; i++)
            {
                for (int j = 0; j < Prop.Columns.Count; j++)
                {
                    prop_celda = Convert.ToDouble(Prop.Rows[i][j]);

                    if (prop_celda != nullvalue)
                    {
                        for (int k = 0; k < Rango.Rows.Count; k++)
                        {
                            prof_celda = Convert.ToDouble(Prof.Rows[i][j]);
                            prof_rango = Convert.ToDouble(Rango.Rows[k][0]);

                            if (prof_celda < prof_rango)
                            {
                                dt.Rows[k][3] = Convert.ToDouble(dt.Rows[k][3]) + prop_celda;
                                dt.Rows[k][4] =  Convert.ToInt32(dt.Rows[k][4]) + 1;
                                break;
                            }
                        }
                    }
                }
                progressBar.PerformStep();
                Application.DoEvents();
            }

            for (int i = 0; i < dt.Rows.Count; i++)
            {
                if (Convert.ToInt32(dt.Rows[i][4]) == 0)
                {
                    dt.Rows[i].Delete();
                    i -= 1;
                }
            }

            return dt;
        }

如果tabla A具有10000个单元格,则此代码运行速度很快,如果200000个单元格需要5分钟,而1000000个单元格则需要20分钟。

1 个答案:

答案 0 :(得分:0)

这是算法并行化的示例。但是,使用DataTable会带来一些性能损失。您应该考虑使用更合适的类。

我进行了以下更改:

  • 将计算结果提取到一个单独的类中。
  • 将计算分为n个任务。
  • 添加了通过CancellationTokenSource取消的支持
  • 将主动进度报告替换为被动进度报告。
  • 添加了异常处理

现在一切都在后台运行。您不再需要阻塞或放慢用户界面,只需执行计算,并在计算完成时回叫即可。

您可以手动设置线程数,也可以让算法使用CPU内核数,从而最大化性能。

请注意,这不是理想的实现,它只是一个示例,未经测试。

在我看来,您的描述与代码完全不匹配(您谈到了2个输入表,但是代码与3个输入一起工作-Prop和Prof不一样吗?)

using System;
using System.Data;
using System.Threading;
using System.Threading.Tasks;

public class ParallelCalculation
{
    public delegate void CompletionHandler(DataTable result, Exception exception);
    public DataTable Prof, Prop, Rango;

    class Part
    {
        public DataTable Result;
        public int FromRow, ToRow;
        public float Progress;
        public Exception Exception;
    }

    DataTable result;
    Part[] parts;
    Task[] tasks;
    CancellationToken cancellation;
    CompletionHandler callback;

    public async void Run(CompletionHandler callback, CancellationToken token, int threadCount = 0)
    {
        this.cancellation = token;
        this.callback = callback;

        await Task.Factory.StartNew(Perform, threadCount);
    }

    async void Perform(object state)
    {
        int threadCount = (int)state;

        try
        {
            // Create table for results
            result = new DataTable();
            result.Columns.Add("Prof Evaluar", typeof(double));
            result.Columns.Add("Profundidad", typeof(double));
            result.Columns.Add("Promedio", typeof(double));
            result.Columns.Add("Sumatoria", typeof(double));
            result.Columns.Add("n", typeof(double));

            for (int i = 0; i < Rango.Rows.Count; i++)
                result.Rows.Add(Rango.Rows[i][0], Rango.Rows[i][1], 0, 0, 0);

            // Split calculation into n tasks. Tasks work in parallel,
            // each one processes it's own stripe of data, defined by the instance of the Part class.
            int n = threadCount > 0 ? threadCount : Environment.ProcessorCount;
            tasks = new Task[n];
            parts = new Part[n];
            int rowsPerTask = Prof.Rows.Count / n;
            int rest = Prof.Rows.Count % n;
            for (int i = 0, from = 0, to = 0; i < n; ++i, --rest, from = to)
            {
                to = from + rowsPerTask + (rest > 0 ? 1 : 0);
                parts[i] = new Part { FromRow = from, ToRow = to };
                tasks[i] =  Task.Factory.StartNew(CalculatePart, parts[i]);
            }

            // Wait until all partial calculations are finished
            await Task.WhenAll(tasks);

            // Sum partial results to the main result table (and find the first exception, if any)
            Exception e = null;
            foreach (var part in parts)
            {
                e = e ?? part.Exception;
                for (int row = 0; row < result.Rows.Count; ++row)
                {
                    result.Rows[row][3] = Convert.ToDouble(result.Rows[row][3]) + Convert.ToDouble(part.Result.Rows[row][3]);
                    result.Rows[row][4] = Convert.ToInt32(result.Rows[row][4]) + Convert.ToInt32(part.Result.Rows[row][4]);
                }
            }

            // Remove empty rows from results
            for (int i = 0; i < result.Rows.Count; i++)
            {
                if (Convert.ToInt32(result.Rows[i][4]) == 0)
                {
                    result.Rows[i].Delete();
                    i -= 1;
                }
            }

            // Call back 
            callback?.Invoke(result, e);
        }
        catch (Exception e)
        {
            callback?.Invoke(null, e);
        }
    }

    void CalculatePart(object state)
    {
        var part = (Part)state;
        try
        {
            // Create our own table for partial results.
            part.Result = this.result.Copy();

            var result = part.Result; // Just a shortcut

            int cols = Prop.Columns.Count;
            int steps = cols * (part.ToRow - part.FromRow);

            for (int i = part.FromRow, step = 1; i < part.ToRow; i++)
            {
                for (int j = 0; j < cols; j++, step++)
                {
                    var prop_celda_obj = Prop.Rows[i][j];
                    if (prop_celda_obj != DBNull.Value)
                    {
                        double prop_celda = Convert.ToDouble(prop_celda_obj);
                        double prof_celda = Convert.ToDouble(Prof.Rows[i][j]);

                        for (int k = 0; k < Rango.Rows.Count; k++)
                        {
                            //double prof_celda = Convert.ToDouble(Prof.Rows[i][j]);
                            double prof_rango = Convert.ToDouble(Rango.Rows[k][0]);

                            if (prof_celda < prof_rango)
                            {
                                result.Rows[k][3] = Convert.ToDouble(result.Rows[k][3]) + prop_celda;
                                result.Rows[k][4] = Convert.ToDouble(result.Rows[k][4]) + 1;
                                break;
                            }
                        }
                    }

                    part.Progress = step / (float)steps;
                    if (cancellation.IsCancellationRequested)
                        return;
                }
            }
        }
        catch (Exception e)
        {
            part.Exception = e;
        }
    }

    public float Progress()
    {
        float sum = 0.0f;
        foreach (var part in parts)
            sum += part.Progress;
        return sum / parts.Length;
    }
}

以下代码是在Form中使用上述类的示例。您可能需要稍作调整。

partial class MyForm {   

    Button btnStartStop;
    ProgressBar progressBar;

    // Do this somewhere:
    // btnStartStop.Click += BtnStartStop_Click;

    int threads = 0;              // 0 means "The number of CPU cores"
    DataTable Prof, Prop, Rango;  // You have to provide these values

    // The final results will be stored here:
    DataTable Result;

    CancellationTokenSource cancellation;
    ParallelCalculation calculation;
    System.Windows.Forms.Timer progressTimer;

    void BtnStartStop_Click(object sender, EventArgs e)
    {
        if (calculation != null)
            cancellation.Cancel();
        else
            StartCalculation();
    }

    void StartCalculation()
    {
        cancellation = new CancellationTokenSource();
        calculation = new ParallelCalculation { Prof = this.Prof, Prop = this.Prop, Rango = this.Rango };
        calculation.Run(Finished, cancellation.Token, threads);

        progressBar.Value = 0;
        progressTimer = new System.Windows.Forms.Timer(components) { Interval = 100 };
        progressTimer.Tick += ProgressTimer_Tick;
        progressTimer.Start();

        UpdateUI();
    }

    void Finished(DataTable table, Exception e)
    {
        BeginInvoke((Action)delegate
        {
            Result = table;
            progressBar.Value = (int)(calculation.Progress() * 100);
            progressTimer.Stop();
            progressTimer.Tick -= ProgressTimer_Tick;
            calculation = null;

            UpdateUI();
        });
    }

    private void ProgressTimer_Tick(object sender, EventArgs e)
    {
        if (calculation != null)
            progressBar.Value = (int)(calculation.Progress() * 100);
    }

    void UpdateUI()
    {
        btnStartStop.Text = calculation == null ? "Start" : "Stop";
    }
}