Question

我可重复的R例子：

f = runif(1500,10,50)
p = matrix(0, nrow=1250, ncol=250)
count = rep(0, 1250)
for(i in 1:1250) {
    ref=f[i]
    for(j in 1:250) {
        p[i,j] = f[i + j - 1] / ref-1
        if(p[i,j] == "NaN") {
           count[i] = count[i]
           }
        else if(p[i,j] > (0.026)) {
                count[i] = (count[i] + 1) 
                ref = f[i + j - 1] 
                } 
        } 
    }

更准确地说，我有一套600个f系列，这个代码每个f系列运行200次。目前我正在循环中进行迭代，大多数操作都是按元素进行的。我的随机变量是f，条件if(p[i,j] > (0.026))和数字0.026本身。

可以通过矢量化我的代码和使用函数（特别是应用系列）来大幅缩短运行时间，但是我很生气，申请并寻找一些建议以便朝着正确的方向前进。

Answer 1

将for loop放入Rcpp是quite easy。我只是将您的代码复制粘贴到Rcpp并且没有检查有效性。如有不符之处，请告知我们。 fCpp会返回p和c的列表。

cppFunction('List fCpp(NumericVector f) {
    const int n=1250; 
            const int k=250;
            NumericMatrix p(n, k);
            NumericVector c(n);

            for(int i = 0; i < n; i++) {
            double ref=f[i];
            for(int j = 0; j < k; j++) {
            p(i,j) = f[i+j+1]/ref-1;
            if(p(i,j) == NAN){
            c[i]=c[i];
            }
            else if(p(i,j) > 0.026){
            c[i] = c[i]+1; 
            ref = f[i+j+1]; 
            } 
            }
            }
            return List::create(p, c);
            }')

<强>基准

set.seed(1)
f = runif(1500,10,50)

f1 <- function(f){
    p = matrix(0, nrow=1250, ncol=250)
    count = rep(0, 1250)
    for(i in 1:1250) {
        ref=f[i]
        for(j in 1:250) {
            p[i,j] = f[i + j - 1] / ref-1
            if(p[i,j] == "NaN") {
                count[i] = count[i]
            }
            else if(p[i,j] > (0.026)) {
                count[i] = (count[i] + 1) 
                ref = f[i + j - 1] 
            } 
        } 
    }
    list(p, count)
}


microbenchmark::microbenchmark(fCpp(f), f1(f), times=10L, unit="relative")
Unit: relative
    expr      min       lq     mean   median       uq      max neval
 fCpp(f)   1.0000   1.0000   1.0000   1.0000   1.0000   1.0000    10
   f1(f) 785.8484 753.7044 734.4243 764.5883 718.0868 644.9022    10

fCpp(f)和f1(f)返回的值基本相同，除了由p返回的f1矩阵的第1列填充0。

system.time(a <- f1(f))[3]
#elapsed 
#    2.8 
system.time(a1 <- fCpp(f))[3]
#elapsed 
#      0 
all.equal( a[[1]], a1[[1]])
#[1] "Mean relative difference: 0.7019406"
all.equal( a[[2]], a1[[2]])
#[1] TRUE

Answer 2

这是一个使用while的实现，虽然它比嵌套的for循环花费的时间更长，这有点反直觉。

f1 <- function() {
    n <- 1500
    d <- 250
    f = runif(n,1,5)
    f = embed(f, d)
    f = f[-(n-d+1),]
    count = rep(0, n-d)
    for(i in 1:(n-d)) {
        tem <- f[i,]/f[i,1] - 1
        ti <- which(t[-d] > 0.026)[1]
        while(ti < d & !is.na(ti)) {
            ti.plus = ti+1
            tem[ti.plus:d] = f[i, ti.plus:d] / tem[ti]
            count[i] = count[i] + 1
            ti <- ti + which(tem[ti.plus:d-1] > 0.026)[1]
        }
        f[i] = tem
    }
    list(f, count)
}

system.time(f1())

#elapsed 
#6.365

Answer 3

@ajmartin，你的逻辑更好，减少了我尝试的迭代次数。以下是R代码的改进版本：

f1 <- function() {
  n <- 1500
  d <- 250
  f = runif(n,1,5)
  count = rep(0, n-d)
  for(i in 1:(n-d)) {
    tem <- f[i:(i+d-1)] / f[i] - 1
    ind = which(tem>0.026)[1]
    while(length(which(tem>0.026))){
      count[i] = count[i] + 1
      tem[ind:d] = f[ind:d] / tem[ind] - 1
      ind = ind - 1 + (which(tem[ind:d] > 0.026)[1])
    }
  }
  list(f, count)
}

system.time(f1())[3]
# elapsed 
#    0.09

在Rcpp中实现此功能将进一步缩短系统时间，但由于我当前的计算机没有管理员权限，因此无法安装Rtools。同时这有帮助。

使用嵌套for循环改善R的运行时间

3 个答案: