Question

我有一个大的随机值矩阵（例如200,000 x 6,000），介于0-1之间，名为allGSR。＆＃39; 我使用以下代码创建逻辑数组（？），其中1表示小于.05

的数字

sig = (allGSR < .05);

我想要做的是返回一个大小为1 x 200,000的数组，称为maxSIG，其中每一行代表最大连续数。因此，例如，如果在第1行中，第3-6列是1，那么连续是4个，如果列100-109是一行中的10个，如果是行中的最大数量1我希望maxSIG的第一列是值＃10;＆＃39;

我一直在使用for循环，if语句和计数器;这是丑陋乏味的，并且想知道是否有更简单或更有效的方式。

感谢您的任何见解。

编辑：哎呀，应该分享循环。编辑2：所以我只用较小的（100 x 6,000）矩阵写出了我的基本代码。这段代码应该运行。很抱歉给您带来不便。

GSR = 6000;
samples = 100;
allGSR = zeros(samples, GSR);
for x = 1:samples
    y = rand(GSR, 1)';  %Transpose so it's 1x6000 and not 6000x1
    allGSR(x,:) = y;
end

countSIG = zeros(samples,1);
abovethreshold = (allGSR < .05); %.05 can be replaced by whatever
for z = 1:samples
    count = 0;
    holdArray = zeros(1,GSR);
    for a = 1:GSR
        if abovethreshold(z,a) == true
            count = count + 1;
        else
            count = 0;
        end
        holdArray(1,a) = count;
    end
    maxrun = max(holdArray);
    countSIG(z,1) = maxrun;
end

Answer 1

以下是使用diff，find＆amp; accumarray -

append_col = zeros(size(abovethreshold,1),1);
df = diff([append_col abovethreshold append_col],[],2).'; %//'
[R1,C1] = find(df==1);
[R2,C2] = find(df==-1);
out = zeros(samples,1);
out(1:max(C1)) = accumarray(C1,R2 - R1,[],@max);

在上面发布的代码中，我们使用abovethreshold创建一个fat数组，然后对其进行转置。从性能的角度来看，转置操作可能不是最好的选择。所以，我们可以围绕它而不是自己移动东西，就像这样 -

append_col = zeros(size(abovethreshold,1),1);
df = diff([append_col abovethreshold append_col],[],2); %//'
[R1,C1] = find(df==1);
[R2,C2] = find(df==-1);
[~,idx1] = sort(R1);
[~,idx2] = sort(R2);
out = zeros(samples,1);
out(1:max(R1)) = accumarray(R1(idx1),C2(idx2) - C1(idx1),[],@max);

Answer 2

如果您担心大型阵列上的内存分配，速度等问题，我会在c ++中使用相同的基本算法。把它放在像myfunction.cpp文件这样的东西中并用mex -largeArrayDims myfunction.cpp编译。

然后，您可以使用counts = myfunction(allGSR, .05);

从matlab调用

我没有测试过它编译的内容。

#include "mex.h"
#include "matrix.h"

void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
  if(nrhs != 2)
    mexErrMsgTxt("Invalid number of inputs.  Shoudl be 2 input argument.");
  if(nlhs != 1)
    mexErrMsgTxt("Invalid number of outputs.  Should be 1 output arguments.");
  if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1]))
    mexErrMsgTxt("First two arguments are not doubles");

  const mxArray *input_array = prhs[0];
  const mxArray *threshold_array = prhs[1];
  size_t input_rows = mxGetM(input_array);
  size_t input_cols = mxGetN(input_array);
  size_t threshold_rows = mxGetM(threshold_array);
  size_t threshold_cols = mxGetN(threshold_array);
  if(threshold_rows != 1 || threshold_cols != 1)
    mexErrMsgTxt("threshold array should be a scalar");

  mxArray *output_array = mxCreateDoubleMatrix(1, input_rows, mxREAL);  
  double *output_data = mxGetPr(output_array);
  double *input_data  = mxGetPr(input_array);
  double threshold = *mxGetPr(threshold_array);


  for(int z = 0; z < input_rows; z++) {
    int count = 0;
    int max_count = 0;
    for(int a = 0; a < input_cols; a++) {
      if(input_data[z + a * input_rows] < threshold) {
        count++;
      } else {
        if(count > max_count)
           max_count = count;
        count = 0;
      }
    }
    if(count > max_count)
      max_count = count;
    output_data[z] = max_count;
  }

  plhs[0] = output_array;
}

我不确定您是否要检查高于或低于阈值？无论你做什么，你都可以将input_data[z + a * input_rows] < threshold)更改为你想要的任何比较运算符。

Answer 3

这里是一个单行，虽然cellfun是一个循环，但速度很慢：

maxSIG=cellfun(@(x) max(getfield(regionprops(x),'Area')),mat2cell(allGSR,ones(6000,1),100));

图像处理工具箱功能regionprops在逻辑矩阵中标识1的连接组。通过对矩阵的每一行进行操作，并特别返回Area属性，我们得到每行中每个连接段的长度为1＃。 max函数会选出您要查找的每一行的长度。

注意mat2cell调用是将allGSR拆分为行的单元格矩阵所必需的，以便可以调用cellfun。

返回满足条件的行中的最大序列号（MATLAB）

3 个答案: