使用Rcpp创建大量(大型)向量

时间:2018-11-11 04:06:18

标签: list rcpp

我的函数的输入由两个矩阵mat1mat2以及排列数B组成。 mat1mat2都有m列,但行数不同。

该函数首先排列两个矩阵的行(同时保留列信息)。然后,它执行一些操作,比较mat1mat2的排列版本的列。

以下是我的函数permute_data的示例。比较函数CompareMatCols()输出长度为m的向量。

问题 初始化输出列表对象的最佳方法是什么?我看过几篇文章指出push_back的局限性。 两者 Bm的数量级约为10000,因此一种有效的方法将是理想的选择。

#include <Rcpp.h>
#include <math.h>
//#include <random> //for std::shuffle

using namespace std;
using namespace Rcpp;

// [[Rcpp::export]]
NumericVector ColMax(NumericMatrix X) {
  NumericVector out = no_init(X.cols());
  for(int j = 0; j < X.cols(); ++j) {
      double omax = X(0,j);
    for(int i = 0; i < X.rows(); ++i){
      omax = std::max(X(i,j),omax);
    }
    out[j] = omax;
  }
  return out;
}

// [[Rcpp::export]]
NumericVector vecmin(NumericVector vec1, NumericVector vec2) {
  int n = vec1.size();
  if(n != vec2.size()) return 0;
  else {
    NumericVector out = no_init(n);
    for(int i = 0; i < n; i++) {
      out[i] = std::min(vec1[i], vec2[i]);
    }
    return out;
  }
}

// [[Rcpp::export]]
List permute_data(NumericMatrix mat1,NumericMatrix mat2,int B) {

  List out(B); // How to initialize this???, Will be large ~10000 elements
  int N1 = mat1.rows();
  int N2 = mat2.rows();
  int m = mat1.cols(); //Will be large ~10000 elements

  // Row labels to be permuted
  IntegerVector permindx = seq(0,N1+N2-1);
  NumericMatrix M1 = no_init_matrix(N1,m);
  NumericMatrix M2 = no_init_matrix(N2,m);

  for(int b = 0; b<B; ++b){
    // Permute the N1+N2 rows
   /*std::random_device rng;
   std::mt19937 urng(rng()); //uniform rng
   std::shuffle(permindx.begin(),permindx.end(),urng);*/
    permindx = sample(permindx,N1+N2); //Use Rcpp's function to work with R's RNG
    for(int j=0; j<m; ++j){
        // Pick out first N1 elements of permindx
        for(int i=0; i<N1; ++i){
          if(permindx[i]>=N1){ //Check that shuffled index is in bounds
            M1(i,j) = mat2(permindx[i],j);
          } else{
            M1(i,j) = mat1(permindx[i],j);
          }
        }
        // Pick out last N2 elements of permindx
        for(int k=0; k<N2; ++k){
          if(permindx[k+N1]<N1){ //Check that shuffled index is in bounds
            M2(k,j) = mat1(permindx[k+N1],j);
          } else{
            M2(k,j) = mat2(permindx[k+N1],j);
          }
        }
    }
    out[b] = vecmin(ColMax(M1),ColMax(M2)); //a vector of length m
  }
  return(out);
}

/***R
set.seed(1)
X = matrix(rnorm(3*5),ncol=5)
Y = matrix(rnorm(5*5),ncol=5)
B = 5
res = permute_data(X,Y,B)
*/

编辑:添加了一些行以解决@duckmayr的问题。 编辑2:根据Dirk的建议,我提供了一个最少完整的可验证示例。

1 个答案:

答案 0 :(得分:0)

最后,我使用的解决方案是上面看到的解决方案:

// [[Rcpp::export]]
List permute_data(NumericMatrix mat1,NumericMatrix mat2,int B) {

  List out(B); // Will be large ~5000 elements
  int N1 = mat1.rows();
  int N2 = mat2.rows();
  int m = mat1.cols(); //Will be large ~10000 elements

  // Row labels to be permuted
  IntegerVector permindx = seq(0,N1+N2-1);
  NumericMatrix M1 = no_init_matrix(N1,m);
  NumericMatrix M2 = no_init_matrix(N2,m);

  for(int b = 0; b<B; ++b){
    // Permute the N1+N2 rows
    permindx = sample(permindx,N1+N2); //Use Rcpp's function to work with R's RNG
    for(int j=0; j<m; ++j){
        // Pick out first N1 elements of permindx
        for(int i=0; i<N1; ++i){
          if(permindx[i]>=N1){ //Check that shuffled index is in bounds
            M1(i,j) = mat2(permindx[i],j);
          } else{
            M1(i,j) = mat1(permindx[i],j);
          }
        }
        // Pick out last N2 elements of permindx
        for(int k=0; k<N2; ++k){
          if(permindx[k+N1]<N1){ //Check that shuffled index is in bounds
            M2(k,j) = mat1(permindx[k+N1],j);
          } else{
            M2(k,j) = mat2(permindx[k+N1],j);
          }
        }
    }
    out[b] = vecmin(ColMax(M1),ColMax(M2)); //a vector of length m
  }
  return(out);
}