Question

int n = varRatio.Count * varRatio[0].Count;
        double[] y_0 = new double[var_ratio_ne.Count];
        double[] y_n = new double[y_0.Length];
        double[] var_map = new double[y_0.Length];
        double[] var_fa_map = new double[y_0.Length];
        for (int j = 0; j < var_width.Count; j++)
        {
            List<double> tempRow = new List<double>();
            for (int index = 0; index < var_ratio_ne.Count; index++)
            {
                y_0[index] = ( (var_ratio_ne[index] - var_thr[0]) / var_width[j]);
            }
            double inc = delta / var_width[j];
            for (int i = 0; i < var_thr.Count; i++)
            {
                if (var_thr[i] >= curr_max)
                {
                    break;
                }
                Parallel.For(0, y_0.Length, k =>
                {
                    y_n[k] = y_0[k] - i*inc;
                    var_map[k] = Math.Min(Math.Max(y_n[k], 0), 1);
                    var_fa_map[k] = (not_edge_map[k]*var_map[k]);
                });
                tempRow.Add(var_fa_map.Sum() / n);
            }
            var_measure.Add(tempRow);
        }

这是我正在转换的matlab代码：

curr_max = max(var_ratio(:));
N = numel(var_ratio);
for j = 1:numel(var_width)
    y_0 = (var_ratio_ne - var_thr(1))/var_width(j);
    inc = delta/var_width(j);
    z = not_edge_map;
    for i = 1:numel(var_thr)
        if var_thr(i)>=curr_max
            break;
        end
        y_n = y_0 - (i-1)*inc;
        var_map = min(max(y_n,0),1);
        var_fa_map = z.*var_map;
        var_measure(i,j) = sum(var_fa_map(:))/N;
        % optimization for matlab: pixels that didn't contribute to the false alarm in this
        % iteration will not contribute in the next one as well becouse the treshold increses so we can throw them out 
        ii = y_n>0;
        y_0 = y_0(ii);
        z = z(ii);
    end
end

数组的大小为：

N = 673326
var_ratio_ne = 586417
var_thr = 131072
var_width = [15 30 45]
not_edge_map = 586417
var_ratio = 666x1011 double matrix

更新：此更改后我的代码运行速度更快

    //N = numel(var_ratio);
    int n = varRatio.Count * varRatio[0].Count;
    double[] y_0 = new double[var_ratio_ne.Count];
    for (int j = 0; j < var_width.Count; j++)
    {
        for (int index = 0; index < var_ratio_ne.Count; index++)
        {
            y_0[index] = ( (var_ratio_ne[index] - var_thr[0]) / var_width[j]);
        }
        double inc = delta / var_width[j];
        int indexOF = var_thr.FindIndex(x => x >= curr_max);
        double[] tempRow = new double[indexOF];
        Parallel.For(0, indexOF ,i =>
        {
            var total = 0d;
            for (int k = 0; k < y_0.Length; k++)
            {
                total += (not_edge_map[k] * Math.Min(Math.Max(y_0[k] - i * inc, 0), 1));
            }
            tempRow[i] = total/n;
        });

        List<double> tempRowList = new List<double>();
        //copy the results of Parallel compute
        for (int i = 0; i < indexOF; i++)
        {
            tempRowList.Add(tempRow[i]);
        }
        //fill the rest with zeros
        for (int i = indexOF; i < var_thr.Count; i++)
        {
            tempRowList.Add(0);
        }
        var_measure.Add(tempRowList);
    }

我想我在这里计算一些东西。虽然我在调试模式下运行，但与matlab（约20秒）相比，C＃的性能（以分钟为单位）非常糟糕。
你能帮助我进行运行时优化吗？我发现很难理解为什么matlab比C＃代码表现得更好。

Answer 1

我建议减少大额分配，所以：

List<double> y_n = new List<double>();
List<double> var_map = new List<double>();
List<double> var_fa_map = new List<double>();
for (int k = 0; k < y_0.Count; k++)
{
    y_n.Add(y_0[k] - i * inc);
    var_map.Add(Math.Min(Math.Max(y_n[k], 0), 1));
    var_fa_map.Add(not_edge_map[k] * var_map[k]);
}
tempRow.Add(var_fa_map.Sum() / n);

变为：

var total = 0d;
for (int k = 0; k < y_0.Count; k++)
{
    total += (not_edge_map[k] * Math.Min(Math.Max(y_0[k] - i * inc, 0), 1));
}       
tempRow.Add(total / n);

在我的测试中，这个时间缩短了一半，但你的milage可能会有所不同。还有其他一些优化可以确保，例如减少分配和组合一些计算任务，但我需要更好的代表性输入才能有效地分析它，例如我不确定是否这样做并行和切换到并发集合将产生积极的影响。

Matlab代码比不期望的C＃代码快得多

1 个答案: