我是R的新手,我想知道是否可以更有效地实现以下设置?时间序列长度(x,y)约为5000,h!= nrow(q)。
set.seed(1)
h = 21
x <- rnorm(5e3, 1)
y <- rnorm(5e3, 2)
q <- c(0.1, 0.3, 0.5, 0.7, 0.9)
qx <- quantile(x, probs = q)
qx <- expand.grid(qx, qx)
qy <- quantile(y, probs = q)
qy <- expand.grid(qy, qy)
q <- expand.grid(q, q)
f <- function(z, l, qz) {
n <- length(z)
1/(n - l) * sum((z[1:(n-l)] <= qz[[1]]) * (z[(1+l):n] <= qz[[2]])) - prod(q[i,])
}
sum = 0
for (i in 1:h) {
for (j in 1:nrow(q)) {
sum = sum + (f(x, l = i, qx[j,]) - f(y, l = i, qy[j,]))^2
}
}
sum
# 0.0008698279
非常感谢您!
答案 0 :(得分:1)
在某些情况下,可以使用sapply
函数来代替循环。
此功能的工作方式如下:对向量的每个元素执行某些功能。
或者,您可以看看foreach
软件包,它提供了一些快速循环。
以下是使用sapply的示例:根据您的确切需求,您可能要使用其中一个功能。而且,sapply只是执行此操作的较快方法之一,不一定是最快的方法。
# setup from the question
set.seed(1)
h = 1
x <- rnorm(5e3, 1)
y <- rnorm(5e3, 2)
q <- c(0.1, 0.3, 0.5, 0.7, 0.9)
qx <- quantile(x, probs = q)
qx <- expand.grid(qx, qx)
qy <- quantile(y, probs = q)
qy <- expand.grid(qy, qy)
q <- expand.grid(q, q)
f <- function(z, l, qz) {
n <- length(z)
1/(n - l) * sum((z[1:(n-l)] <= qz[[1]]) * (z[(1+l):n] <= qz[[2]])) - prod(q[i,])
}
# load microbenchmark library for comparison of execution times
library(microbenchmark)
microbenchmark({
# the version from question with for loop
sum = 0
for (i in 1:h) {
for (j in 1:nrow(q)) {
sum = sum + (f(x, l = i, qx[j,]) - f(y, l = i, qy[j,]))^2
}
}
},
{
# using sapply and storing to object. this will give you h*j matrix as well as the sum
sum = 0
sapply(1:h, function(i) sapply(1:nrow(q), function(j) {sum <<- sum + (f(x, l = i, qx[j,]) - f(y, l = i, qy[j,]))^2}))
},
{
# use sapply and sum the output
sum(sapply(1:h, function(i) sapply(1:nrow(q), function(j) {(f(x, l = i, qx[j,]) - f(y, l = i, qy[j,]))^2})))},
# run each code 200 times to get the time comparison
times = 200
)