我正在尝试使用RccParrallel来估计两个3D矩阵的行之间的距离并返回一个新的矩阵。我看到了Parallel Distance Matrix Calculation使用parallelFor
的示例,但这些计算来自具有固定大小的单个matrix
。
例如,假设我有两个矩阵amat
和bmat
,它们中的nrow
可能会不同,但是ncol
始终是{{ 1}}。在R中,这些可能看起来像:
3
在此示例中,预期输出为set.seed(10); amat <- matrix(rnorm(9, 2, 0.5), ncol = 3)
set.seed(50); bmat <- matrix(rnorm(9, 2, 0.5), ncol = 3)
的{{1}}和5列(1 matrix
的{{1}}索引,2:4 {{1 }}的值为nrow = amat.row()*bmat.row() = 9
,矩阵行之间的欧式距离为5。
row
这是我到目前为止在amat
示例中得到启发的代码
row
是否知道如何使用RcppParallel来实现此目的?显然,我使用并行是因为bmat
和 points X Y Z distance
[1,] 0 1.579198 1.136198 1.704544 0.7737024
[2,] 0 2.274835 2.262075 2.180414 1.0006478
[3,] 0 2.016499 1.861068 2.487795 1.1036122
[4,] 1 2.274835 2.262075 2.180414 0.5282677
[5,] 1 2.016499 1.861068 2.487795 0.7362889
[6,] 1 1.579198 1.136198 1.704544 1.0692094
[7,] 2 1.579198 1.136198 1.704544 1.2079720
[8,] 2 2.274835 2.262075 2.180414 1.3836957
[9,] 2 2.016499 1.861068 2.487795 1.5157243
中的RcppParallel
接近于一千万。我在使用// [[Rcpp::depends(RcppParallel)]]
#include <RcppParallel.h>
using namespace RcppParallel;
struct Mdistance : public Worker { //function object
// input 3D-matrix
const RMatrix<double> amat;
const RMatrix<double> bmat;
// output matrix to write to
RMatrix<double> rmat;
// initialize from Rcpp input and output matrixes
Mdistance(const NumericMatrix amat, const NumericMatrix bmat, NumericMatrix rmat)
: amat(amat), bmat(bmat), rmat(rmat) {}
// function call operator that work for the specified range (begin/end) #Not sure of this part
void operator()(std::size_t begin, std::size_t end) {
for (std::size_t i = 0; i < amat.nrow(); i++) {
for (std::size_t j = 0; j < bmat.nrow(); j++) {
// write to output matrix
rmat((i + (j * amat.nrow())), 0) = i + 1; //Row index of amat
rmat((i + (j * amat.nrow())), 1) = bmat(j, 0); //Value of column 0 of bmat
rmat((i + (j * amat.nrow())), 2) = bmat(j, 1); //Value of column 1 of bmat
rmat((i + (j * amat.nrow())), 3) = bmat(j, 2); //Value of column 2 of bmat
rmat((i + (j * amat.nrow())), 4) = sqrt((pow(bmat(j,0) - amat(i,0), 2.0) + pow(bmat(j, 1) - amat(i, 1), 2.0) + pow(bmat(j, 2) - amat(i, 2), 2.0))); //Euclidean distance between rows
}
}
}
};
// [[Rcpp::export]]
NumericMatrix Mdistance_parallel(NumericMatrix amat, NumericMatrix bmat) {
// allocate the matrix we will return
NumericMatrix rmat((amat.nrow()*bmat.nrow()), 5);
// create the worker
Mdistance Mdistance(amat, bmat, rmat);
// call it with parallelFor
parallelFor(0, (amat.nrow()*bmat.nrow()), MDistance);
return rmat;
}
中基于nrow
的其他例程。但是,这需要很长时间(> 1天),而且似乎不稳定。
谢谢...
编辑
这是我仅使用Rcpp的示例
amat
答案 0 :(得分:0)
我已经解决了。但是,这是正确的,为此的内存分配太大。
这是解决方案。
// [[Rcpp::depends(RcppParallel)]]
#include <Rcpp.h>
#include <RcppParallel.h>
using namespace Rcpp;
using namespace RcppParallel;
struct Mdistance : public Worker { //function object
// input 3D-matrix
const RMatrix<double> amat;
const RMatrix<double> bmat;
// output matrix to write to
RMatrix<double> rmat;
// initialize from Rcpp input and output matrixes
Mdistance(const NumericMatrix amat, const NumericMatrix bmat, NumericMatrix rmat)
: amat(amat), bmat(bmat), rmat(rmat) {}
// function call operator that work for the specified range (begin/end) #Not sure of this part
void operator()(std::size_t begin, std::size_t end) {
for (std::size_t i = begin; i < end; i++) {
for (std::size_t j = 0; j < bmat.nrow(); j++) {
// write to output matrix
rmat((i + (j * amat.nrow())), 0) = i + 1; //Row index of amat
rmat((i + (j * amat.nrow())), 1) = bmat(j, 0); //Value of column 0 of bmat
rmat((i + (j * amat.nrow())), 2) = bmat(j, 1); //Value of column 1 of bmat
rmat((i + (j * amat.nrow())), 3) = bmat(j, 2); //Value of column 2 of bmat
rmat((i + (j * amat.nrow())), 4) = sqrt((pow(bmat(j,0) - amat(i,0), 2.0) + pow(bmat(j, 1) - amat(i, 1), 2.0) + pow(bmat(j, 2) - amat(i, 2), 2.0))); //Euclidean distance between rows
}
}
}
};
// [[Rcpp::export]]
NumericMatrix Mdistance_parallel(NumericMatrix amat, NumericMatrix bmat) {
// allocate the matrix we will return
NumericMatrix rmat((amat.nrow()*bmat.nrow()), 5);
// create the worker
Mdistance Mdistance(amat, bmat, rmat);
// call it with parallelFor
parallelFor(0, amat.nrow(), Mdistance);
return rmat;
}
/*** R
set.seed(10); amat <- matrix(rnorm(9, 2, 0.5), ncol = 3)
set.seed(50); bmat <- matrix(rnorm(9, 2, 0.5), ncol = 3)
Mdistance_parallel(amat, bmat)
*/