RccParrallel估计R中两个矩阵的行之间的距离

时间:2019-12-30 04:33:26

标签: c++ r parallel-processing rcpp rcppparallel

我正在尝试使用RccParrallel来估计两个3D矩阵的行之间的距离并返回一个新的矩阵。我看到了Parallel Distance Matrix Calculation使用parallelFor的示例,但这些计算来自具有固定大小的单个matrix

例如,假设我有两个矩阵amatbmat,它们中的nrow可能会不同,但是ncol始终是{{ 1}}。在R中,这些可能看起来像:

3

在此示例中,预期输出为set.seed(10); amat <- matrix(rnorm(9, 2, 0.5), ncol = 3) set.seed(50); bmat <- matrix(rnorm(9, 2, 0.5), ncol = 3) 的{​​{1}}和5列(1 matrix的{​​{1}}索引,2:4 {{1 }}的值为nrow = amat.row()*bmat.row() = 9,矩阵行之间的欧式距离为5。

row

这是我到目前为止在amat示例中得到启发的代码

row

是否知道如何使用RcppParallel来实现此目的?显然,我使用并行是因为bmat points X Y Z distance [1,] 0 1.579198 1.136198 1.704544 0.7737024 [2,] 0 2.274835 2.262075 2.180414 1.0006478 [3,] 0 2.016499 1.861068 2.487795 1.1036122 [4,] 1 2.274835 2.262075 2.180414 0.5282677 [5,] 1 2.016499 1.861068 2.487795 0.7362889 [6,] 1 1.579198 1.136198 1.704544 1.0692094 [7,] 2 1.579198 1.136198 1.704544 1.2079720 [8,] 2 2.274835 2.262075 2.180414 1.3836957 [9,] 2 2.016499 1.861068 2.487795 1.5157243 中的RcppParallel接近于一千万。我在使用// [[Rcpp::depends(RcppParallel)]] #include <RcppParallel.h> using namespace RcppParallel; struct Mdistance : public Worker { //function object // input 3D-matrix const RMatrix<double> amat; const RMatrix<double> bmat; // output matrix to write to RMatrix<double> rmat; // initialize from Rcpp input and output matrixes Mdistance(const NumericMatrix amat, const NumericMatrix bmat, NumericMatrix rmat) : amat(amat), bmat(bmat), rmat(rmat) {} // function call operator that work for the specified range (begin/end) #Not sure of this part void operator()(std::size_t begin, std::size_t end) { for (std::size_t i = 0; i < amat.nrow(); i++) { for (std::size_t j = 0; j < bmat.nrow(); j++) { // write to output matrix rmat((i + (j * amat.nrow())), 0) = i + 1; //Row index of amat rmat((i + (j * amat.nrow())), 1) = bmat(j, 0); //Value of column 0 of bmat rmat((i + (j * amat.nrow())), 2) = bmat(j, 1); //Value of column 1 of bmat rmat((i + (j * amat.nrow())), 3) = bmat(j, 2); //Value of column 2 of bmat rmat((i + (j * amat.nrow())), 4) = sqrt((pow(bmat(j,0) - amat(i,0), 2.0) + pow(bmat(j, 1) - amat(i, 1), 2.0) + pow(bmat(j, 2) - amat(i, 2), 2.0))); //Euclidean distance between rows } } } }; // [[Rcpp::export]] NumericMatrix Mdistance_parallel(NumericMatrix amat, NumericMatrix bmat) { // allocate the matrix we will return NumericMatrix rmat((amat.nrow()*bmat.nrow()), 5); // create the worker Mdistance Mdistance(amat, bmat, rmat); // call it with parallelFor parallelFor(0, (amat.nrow()*bmat.nrow()), MDistance); return rmat; } 中基于nrow的其他例程。但是,这需要很长时间(> 1天),而且似乎不稳定。

谢谢...

编辑

这是我仅使用Rcpp的示例

amat

1 个答案:

答案 0 :(得分:0)

我已经解决了。但是,这是正确的,为此的内存分配太大。

这是解决方案。

// [[Rcpp::depends(RcppParallel)]]
#include <Rcpp.h>
#include <RcppParallel.h>
using namespace Rcpp;
using namespace RcppParallel;

struct Mdistance : public Worker { //function object

  // input 3D-matrix
  const RMatrix<double> amat;
  const RMatrix<double> bmat;

  // output matrix to write to
  RMatrix<double> rmat;

  // initialize from Rcpp input and output matrixes
  Mdistance(const NumericMatrix amat, const NumericMatrix bmat, NumericMatrix rmat)
    : amat(amat), bmat(bmat), rmat(rmat) {}

  // function call operator that work for the specified range (begin/end) #Not sure of this part
  void operator()(std::size_t begin, std::size_t end) {
    for (std::size_t i = begin; i < end; i++) {

        for (std::size_t j = 0; j < bmat.nrow(); j++) {

          // write to output matrix
          rmat((i + (j * amat.nrow())), 0) = i + 1; //Row index of amat
          rmat((i + (j * amat.nrow())), 1) = bmat(j, 0); //Value of column 0 of bmat
          rmat((i + (j * amat.nrow())), 2) = bmat(j, 1); //Value of column 1 of bmat
          rmat((i + (j * amat.nrow())), 3) = bmat(j, 2); //Value of column 2 of bmat
          rmat((i + (j * amat.nrow())), 4) = sqrt((pow(bmat(j,0) - amat(i,0), 2.0) + pow(bmat(j, 1) - amat(i, 1), 2.0) + pow(bmat(j, 2) - amat(i, 2), 2.0))); //Euclidean distance between rows

        }
    }
  }
};

// [[Rcpp::export]]
NumericMatrix Mdistance_parallel(NumericMatrix amat, NumericMatrix bmat) {

  // allocate the matrix we will return
  NumericMatrix rmat((amat.nrow()*bmat.nrow()), 5);

  // create the worker
  Mdistance Mdistance(amat, bmat, rmat);

  // call it with parallelFor
  parallelFor(0, amat.nrow(), Mdistance);

  return rmat;
}

/*** R
set.seed(10); amat <- matrix(rnorm(9, 2, 0.5), ncol = 3)  
set.seed(50); bmat <- matrix(rnorm(9, 2, 0.5), ncol = 3)

Mdistance_parallel(amat, bmat)
*/