我想计算特定原点到一组其他点的距离。我尝试使用RcppParallel
并行化我的代码,但并行化代码的运行速度似乎比纯Rcpp
编写的代码慢得多。
这是可以在Rstudio中轻松编译的代码:
#include <algorithm>
#include <cmath>
#include <RcppArmadillo.h>
#include <RcppParallel.h>
#include <functional>
using namespace RcppParallel;
// Define worker to be used with RcppParallel
struct ComputeDistances : public Worker{
// Training data
const RMatrix<double> trainXs;
// Point to compute distances from
const RVector<double> point;
// Destination vector
RVector<double> distances;
// initialize with source and destination
ComputeDistances(const Rcpp::NumericMatrix trainXs, const Rcpp::NumericVector point, Rcpp::NumericVector distances)
: trainXs(trainXs), point(point), distances(distances) {}
void operator()(std::size_t begin, std::size_t end) {
for (std::size_t i = begin; i < end; i++){
RMatrix<double>::Row some_point = trainXs.row(i);
std::vector<double> temp(some_point.length());
// Compute Euclidean Distance
std::transform(point.begin(), point.end(), some_point.begin(), temp.begin(), std::minus<int>());
std::transform(temp.begin(), temp.end(), temp.begin(), temp.begin(), std::multiplies<int>());
int dist = sqrt(std::accumulate(temp.begin(), temp.end(), 0));
distances[i] = dist;
}
}
};
// Computing the distances in pure Rcpp
// [[Rcpp::export]]
arma::vec compdist(arma::mat data, arma::rowvec point){
arma::vec distances = arma::zeros(data.n_rows);
for (unsigned int i = 0; i < data.n_rows; i++){
double dist = sqrt(arma::sum(arma::pow((data.row(i) - point),2)));
distances[i] = dist;
}
return distances;
}
// Compute the distances using the parallelized code
// [[Rcpp::export]]
arma::vec compdistParallel(arma::mat data, arma::rowvec point){
Rcpp::NumericMatrix DATA = Rcpp::as<Rcpp::NumericMatrix>(Rcpp::wrap(data));
Rcpp::NumericVector POINT = Rcpp::as<Rcpp::NumericVector>(Rcpp::wrap(point));
Rcpp::NumericVector DISTANCES(data.n_rows);
ComputeDistances computedistances(DATA, POINT, DISTANCES);
parallelFor(0, DATA.nrow(), computedistances);
arma::vec distances = Rcpp::as<arma::vec>(Rcpp::wrap(DISTANCES));
return distances;
}
这些是R基准:
library(rbenchmark)
data = matrix(rnorm(10000000), nrow = 100000, ncol = 100)
point = matrix(rnorm(100), nrow = 1, ncol = 100)
res = benchmark(compdist(data, point),
compdistParallel(data, point), replications = 3)
给定结果:
> res
test replications elapsed relative user.self sys.self user.child sys.child
1 compdist(data, point) 3 0.164 1.000 0.097 0.068 0 0
2 compdistParallel(data, point) 3 0.316 1.927 0.503 0.145 0 0
所以我想知道为什么我的代码在并行化时会变慢?我的实施有问题吗?