高效地实现双for循环

时间:2018-12-13 10:40:44

标签: r performance for-loop

我是R的新手,我想知道是否可以更有效地实现以下设置?时间序列长度(x,y)约为5000,h!= nrow(q)。

set.seed(1)
h = 21
x <- rnorm(5e3, 1)
y <- rnorm(5e3, 2)

q <- c(0.1, 0.3, 0.5, 0.7, 0.9)
qx <- quantile(x, probs = q)
qx <- expand.grid(qx, qx)
qy <- quantile(y, probs = q)
qy <- expand.grid(qy, qy)
q <- expand.grid(q, q)

f <- function(z, l, qz) {
n <- length(z)
1/(n - l) * sum((z[1:(n-l)] <= qz[[1]]) * (z[(1+l):n] <= qz[[2]])) - prod(q[i,])
}

sum = 0
for (i in 1:h) {
  for (j in 1:nrow(q)) {
    sum = sum + (f(x, l = i, qx[j,]) - f(y, l = i, qy[j,]))^2
  }
}
sum
# 0.0008698279

非常感谢您!

1 个答案:

答案 0 :(得分:1)

在某些情况下,可以使用sapply函数来代替循环。 此功能的工作方式如下:对向量的每个元素执行某些功能。

或者,您可以看看foreach软件包,它提供了一些快速循环。

以下是使用sapply的示例:根据您的确切需求,您可能要使用其中一个功能。而且,sapply只是执行此操作的较快方法之一,不一定是最快的方法。

# setup from the question
set.seed(1)
h = 1
x <- rnorm(5e3, 1)
y <- rnorm(5e3, 2)

q <- c(0.1, 0.3, 0.5, 0.7, 0.9)
qx <- quantile(x, probs = q)
qx <- expand.grid(qx, qx)
qy <- quantile(y, probs = q)
qy <- expand.grid(qy, qy)
q <- expand.grid(q, q)

f <- function(z, l, qz) {
  n <- length(z)
  1/(n - l) * sum((z[1:(n-l)] <= qz[[1]]) * (z[(1+l):n] <= qz[[2]])) - prod(q[i,])
}

# load microbenchmark library for comparison of execution times
library(microbenchmark)

microbenchmark({
  # the version from question with for loop
  sum = 0
  for (i in 1:h) {
    for (j in 1:nrow(q)) {
      sum = sum + (f(x, l = i, qx[j,]) - f(y, l = i, qy[j,]))^2
    }
  }
},
{
# using sapply and storing to object. this will give you h*j matrix as well as the sum
sum = 0
sapply(1:h, function(i) sapply(1:nrow(q), function(j) {sum <<- sum + (f(x, l = i, qx[j,]) - f(y, l = i, qy[j,]))^2}))
},
{
# use sapply and sum the output
sum(sapply(1:h, function(i) sapply(1:nrow(q), function(j) {(f(x, l = i, qx[j,]) - f(y, l = i, qy[j,]))^2})))},
# run each code 200 times to get the time comparison
times = 200
)