我有以下功能
fun <- cxxfunction(
signature(x="numeric", y="numeric",N="interger", w="numeric", p="numeric"),
plugin="RcppArmadillo",
includes=c("#include <stdlib.h>", "#include <cmath>","#include <numeric>","#include <algorithm>","#include <vector>"),
body='
using namespace Rcpp;
RNGScope scope;
NumericVector xa(x);
NumericVector ya(y);
int Na = as<int>(N);
int n_xa = xa.size(), n_ya = ya.size();
arma::mat wa = Rcpp::as<arma::mat>(w);
Rcpp::NumericMatrix n(p); //one column to each x[i]/y
NumericVector limite(n_xa);
arma::mat z(n_ya, n_xa);
arma::mat beta(n_ya, n_xa);
arma::colvec one(xa.begin(), xa.size());
arma::colvec betaM(ya.begin(), ya.size());
arma::colvec betaP(ya.begin(), ya.size());
arma::colvec Prob(ya.begin(), ya.size());
arma::colvec betaC(ya.begin(), ya.size());
z.col(0) = arma::zeros<arma::mat>(n_ya, 1);
NumericVector nV(n_ya);
NumericVector nT(n_ya);
int i, j, randLR;
for (i=0; i<n_xa; i++) {
for (j=0; j<n_ya; j++) {
z(j,i) = 0;
}
randLR = rand() % n_ya;
// randonly initialize z
z(randLR,i)=1;
limite(i) = i;
one(i) = 1;
}
// the number of ways can be produced
beta = wa * z;
one = beta * one;
int k, l,l2, pos;
NumericMatrix prob_table(n_xa, Na);
NumericMatrix class_table(n_xa, Na);
int oldval;
for(k=0; k<Na; k++){
// limite = f(limite, n_xa);
std::random_shuffle(limite.begin(),limite.end());
for (l=0; l<n_xa; l++) {
l2=limite(l);
// trick, where the z.col came from
betaM = one - wa * z.col(l2);
// sum delta and normalize
Prob = betaM;
std::transform(Prob.begin(), Prob.end(), Prob.begin(), std::bind2nd(std::plus<double>(),1));
std::transform(Prob.begin(), Prob.end(), Prob.begin(), std::bind2nd(std::divides<double>(),std::accumulate(Prob.begin(), Prob.end(), 0.0)));
nV = n(_, l2);
std::transform(nV.begin(), nV.end(), nV.begin(), std::bind2nd(std::divides<double>(),std::accumulate(nV.begin(), nV.end(), 0.0)));
// multipling the the likelihoods
int nP;
for (nP=0; nP<betaP.size(); nP++) {
betaP(nP) = Prob(nP)*nV(nP);
if(z(nP, l2)==1){
oldval = nP;
}
}
// continuing control
if (std::accumulate(betaP.begin(), betaP.end(), 0.0) > 0) {
std::transform(betaP.begin(), betaP.end(), betaP.begin(), std::bind2nd(std::divides<double>(),std::accumulate(betaP.begin(), betaP.end(), 0.0)));
}
else {
// std::transform(betaP.begin(), betaP.end(), betaP.begin(), std::bind2nd(std::plus<double>(),1));
}
// a way to sample according a distribution, based on rogers
double base = ::Rf_runif(0,1);
std::partial_sum (betaP.begin(), betaP.end(), betaC.begin());
int sz = betaC.size();
int idx;
for (idx=0; idx<sz; idx++) {
if(betaC[idx] > base) {
pos = idx;
break;
}
}
if(pos!=oldval){
one = betaM - wa * z.col(l2);
int nP;
for (nP=0; nP<betaP.size(); nP++) {
z(nP, l2) = 0;
}
z(pos, l2) = 1;
one = betaM + wa * z.col(l2);
}
prob_table(l2,k) = betaP(pos);
pos = pos + 1;
class_table(l2,k) = pos;
}
}
// betaP = z.col(l2);
return Rcpp::List::create( Rcpp::Named("beta")=beta, Rcpp::Named("l2")=l2,Rcpp::Named("z")=z, Rcpp::Named("oldval")=oldval, Rcpp::Named("pos")=pos, Rcpp::Named("limite")=limite, Rcpp::Named("n")=nV, Rcpp::Named("betaM")=betaM, Rcpp::Named("betaP")=betaP, Rcpp::Named("Prob")=Prob,Rcpp::Named("prob_table")=prob_table, Rcpp::Named("class_table")=class_table );
'
)
在所有操作系统中,64位,8 GB RAM,我将代码加载为
library(inline)
source("fun.R")
对于一个小例子,该函数在所有系统中运行都很完美,但是当问题的大小增加时,windows和mac的增加似乎要高出几个数量级。
有效的小例子:
varx <- 1:100
vary <- 1:200
w <- matrix(sample(0:1, 200^2, replace=TRUE), 200, 200)
wm <- matrix(abs(rnorm(200*100, 0.5, 0.5)), 200, 100)
system.time(conn <- fun(varx, vary, 5000, w, wm))
仅适用于linux的大型示例:
https://dl.dropboxusercontent.com/u/10712588/var1_teste.rda https://dl.dropboxusercontent.com/u/10712588/var2_teste.rda
load("var1_teste.rda")
load("var2_teste.rda")
varx <- 1:ncol(wl$wm)
vary <- 1:nrow(wl$wm)
system.time(conn <- fun(varx, vary, 5000, w, wl$wm))
在ubuntu上需要30分钟,在Windows或Mac上它需要2天而且没有完成,有人有线索吗?操作系统之间有什么区别吗?
答案 0 :(得分:6)
您是否考虑先使用较小的问题进行测试?
您是否考虑首先重新构建一个现有的RcppArmadillo示例,以检查您测试的所有系统的可行性?
此外,随着更新的Rcpp发布,“Rcpp属性”功能允许您更简单地执行操作。考虑:
R> cppFunction('arma::mat op(arma::colvec x) { return x*x.t(); }',
+ depends="RcppArmadillo")
R> op(1:3)
[,1] [,2] [,3]
[1,] 1 2 3
[2,] 2 4 6
[3,] 3 6 9
R>
我使用一个调用和一个(包装)行创建一个Armadillo C ++函数来生成列向量的外积。