RcppParallel代码比纯Rcpp代码运行得慢

时间:2016-02-03 20:40:49

标签: r rcpp

我想计算特定原点到一组其他点的距离。我尝试使用RcppParallel并行化我的代码,但并行化代码的运行速度似乎比纯Rcpp编写的代码慢得多。

这是可以在Rstudio中轻松编译的代码:

#include <algorithm>
#include <cmath>
#include <RcppArmadillo.h>
#include <RcppParallel.h>
#include <functional>
using namespace RcppParallel;

// Define worker to be used with RcppParallel
struct ComputeDistances : public Worker{

  // Training data
  const RMatrix<double> trainXs;

  // Point to compute distances from
  const RVector<double> point;

  // Destination vector
  RVector<double> distances;

  // initialize with source and destination
  ComputeDistances(const Rcpp::NumericMatrix trainXs, const Rcpp::NumericVector point, Rcpp::NumericVector distances)
    : trainXs(trainXs), point(point), distances(distances) {}

  void operator()(std::size_t begin, std::size_t end) {

    for (std::size_t i = begin; i < end; i++){


      RMatrix<double>::Row some_point = trainXs.row(i);
      std::vector<double> temp(some_point.length());

      // Compute Euclidean Distance
      std::transform(point.begin(), point.end(), some_point.begin(), temp.begin(), std::minus<int>());
      std::transform(temp.begin(), temp.end(), temp.begin(), temp.begin(), std::multiplies<int>());
      int dist = sqrt(std::accumulate(temp.begin(), temp.end(), 0));
      distances[i] = dist;

    }

  }

};

// Computing the distances in pure Rcpp
// [[Rcpp::export]]
arma::vec compdist(arma::mat data, arma::rowvec point){

  arma::vec distances = arma::zeros(data.n_rows);

  for (unsigned int i = 0; i < data.n_rows; i++){
    double dist = sqrt(arma::sum(arma::pow((data.row(i) - point),2)));
    distances[i] = dist;
  }

  return distances;

}

// Compute the distances using the parallelized code
// [[Rcpp::export]]
arma::vec compdistParallel(arma::mat data, arma::rowvec point){

    Rcpp::NumericMatrix DATA = Rcpp::as<Rcpp::NumericMatrix>(Rcpp::wrap(data));
    Rcpp::NumericVector POINT = Rcpp::as<Rcpp::NumericVector>(Rcpp::wrap(point));
    Rcpp::NumericVector DISTANCES(data.n_rows);
    ComputeDistances computedistances(DATA, POINT, DISTANCES);
    parallelFor(0, DATA.nrow(), computedistances);

    arma::vec distances = Rcpp::as<arma::vec>(Rcpp::wrap(DISTANCES));

    return distances;
}

这些是R基准:

library(rbenchmark)
data = matrix(rnorm(10000000), nrow = 100000, ncol = 100)
point  = matrix(rnorm(100), nrow = 1, ncol = 100)
res = benchmark(compdist(data, point),
                compdistParallel(data, point), replications = 3)

给定结果:

> res
                           test replications elapsed relative user.self sys.self user.child sys.child
1         compdist(data, point)            3   0.164    1.000     0.097    0.068          0         0
2 compdistParallel(data, point)            3   0.316    1.927     0.503    0.145          0         0

所以我想知道为什么我的代码在并行化时会变慢?我的实施有问题吗?

0 个答案:

没有答案