Question

我必须在R

中计算以下内容

其中kip，c是常量。这样做的一种方法是：

xfun<- function(x,k,p,c){
ghhh<-numeric()
for(i in 1: length(x)){
ghhh[i]<-sum(k/(x[i]-x[1:(i-1)]+c)^p)
}
res<-sum(log(ghhh))
return(res)
}

。但是我可以使用＆＃34;外部＆＃34;来计算它。？那么它会变快吗？

数据如下：

t <- numeric(2000)
t[1]<-0
for (i in 2:2000){
t[i]<- t[i-1]+rexp(1, 0.2)
}
k=0.5; p=1.2; c=0.02

Answer 1

你的等式有点令人困惑。如果i == 1.我不确定在内部和中应该发生什么？从1到0求和？

基于一些猜测（如果我猜错了，你需要调整以下内容），我怀疑你的功能应该纠正到这个：

xfun<- function(x,k,p,c){
  ghhh<-numeric() # it would be better practice to use numeric(length(x) - 1)
  for(i in 1: (length(x) - 1)){
    ghhh[i]<-sum(k/(x[i+1]-x[1:i]+c)^p)
  }
  res<-sum(log(ghhh))
  return(res)
}

t <- numeric(2000)
t[1]<-0
set.seed(42)
for (i in 2:2000){
  t[i]<- t[i-1]+rexp(1, 0.2)
}
k=0.5; p=1.2; c=0.02
xfun(t, k, p, c)
#[1] -1526.102

用outer重写：

xfun1 <- function(x ,k ,p ,c){

  o <- outer(seq_along(x), seq_along(x), function(i, j) {
    res <- numeric(length(i))
    test <- j < i
    res[test] <- k / (x[i[test]] - x[j[test]] + c) ^ p
    res
  })
  sum(log(rowSums(o)[-1]))
}

xfun1(t, k, p, c)
#[1] -1526.102

基准：

library(microbenchmark)
microbenchmark(loop = xfun(t, k, p, c), 
               outer = xfun1(t, k, p, c),
               times = 10)
#Unit: milliseconds
#  expr      min       lq     mean   median       uq      max neval cld
#  loop 186.0454 186.2375 188.9567 187.4005 189.0597 196.6992    10  a 
# outer 263.4137 274.6610 346.4505 344.6918 423.3651 425.2885    10   b

如您所见，对于此大小的数据，outer的解决方案速度并不快。主要原因是R需要为长度为2000 ^ 2的向量分配内存并处理这个大向量。此外，您的简单循环由JIT字节码编译器优化。

如果想要更快，则需要切换到编译语言。幸运的是，这对Rcpp来说相当容易：

library(Rcpp)
library(inline)

cppFunction(
  'double xfun2(const NumericVector x, const double k, const double p, const double c) {
     int n = x.length() - 1;
     NumericVector ghhh(n);
     for (int i = 0; i < n; ++i) {
       for (int j = 0; j <= i ; ++j) {
         ghhh(i) += k / pow(x(i + 1) - x(j) + c, p);
       }
     }
     ghhh = log(ghhh);
     double res;
     res = sum(ghhh);
     return res;
  }')

xfun2(t, k, p, c)
#[1] -1526.102

microbenchmark(loop = xfun(t, k, p, c), 
               outer = xfun1(t, k, p, c),
               Rcpp =  xfun2(t, k, p, c),
               times = 10)
#Unit: milliseconds
#  expr      min       lq     mean   median       uq      max neval cld
#  loop 186.0395 188.7875 189.7487 189.9298 191.6967 192.7213    10  b 
# outer 408.4452 416.7730 432.3356 419.7510 422.4000 559.4279    10   c
#  Rcpp 136.1496 136.1606 136.1929 136.1762 136.2129 136.3089    10 a

如您所见，此大小的数据的速度提升最小（JIT编译真的很棒）。我建议继续你的R循环。

Answer 2

考虑到您实现的逻辑是正确的，您可以尝试并行R功能：

library(foreach)
library(doParallel)

xfun2<- function(x,k,p,c){

  no_cores <- detectCores() - 1
  cl<-makeCluster(no_cores)
  registerDoParallel(cl)      
  ghhh <- foreach(i  = 1: length(x), .combine = c) %dopar% sum(k/(x[i]-x[1:(i-1)]+c)^p)
  res <- sum(log(ghhh))

}

我用x <- rnorm(100000, 1, 0.5)运行它，并行版本几乎快了两倍。您可以阅读有关doParallel包here

的更多信息

复杂的双和使用＆＃34;外部＆＃34;

2 个答案: