我有5个货币对的1033每日返回点,我想在其上运行滚动窗口回归,但rollapply不适用于我使用lm()的已定义函数。这是我的数据:
> head(fxr)
USDZAR USDEUR USDGBP USDCHF USDCAD
2007-10-18 -0.005028709 -0.0064079963 -0.003878743 -0.0099537170 -0.0006153215
2007-10-19 -0.001544470 0.0014275520 -0.001842564 0.0023058211 -0.0111410271
2007-10-22 0.010878027 0.0086642116 0.010599365 0.0051899551 0.0173792230
2007-10-23 -0.022783987 -0.0075236355 -0.010804304 -0.0041668499 -0.0144788687
2007-10-24 -0.006561223 0.0008545792 0.001024275 -0.0004261666 0.0049525483
2007-10-25 -0.014788901 -0.0048523001 -0.001434280 -0.0050425302 -0.0046422944
> tail(fxr)
USDZAR USDEUR USDGBP USDCHF USDCAD
2012-02-10 0.018619309 0.007548205 0.005526184 0.006348533 0.0067151342
2012-02-13 -0.006449463 -0.001055966 -0.002206810 -0.001638002 -0.0016995755
2012-02-14 0.006320364 0.006843933 0.006605875 0.005992935 0.0007001751
2012-02-15 -0.001666872 0.004319096 -0.001568874 0.003686840 -0.0015009759
2012-02-16 0.006419616 -0.003401364 -0.005194817 -0.002709588 -0.0019044761
2012-02-17 -0.004339687 -0.003675992 -0.003319899 -0.003043481 0.0000000000
我可以轻松地在其上运行一个lm来为整个数据集建模USDZAR与其他对模型:
> lm(USDZAR ~ ., data = fxr)$coefficients
(Intercept) USDEUR USDGBP USDCHF USDCAD
-1.309268e-05 5.575627e-01 1.664283e-01 -1.657206e-01 6.350490e-01
但是我想运行一个滚动的62天窗口来获得这些系数随时间的变化,所以我创建了一个函数dolm来实现这个:
> dolm
function(x) {
return(lm(USDZAR ~ ., data = x)$coefficients)
}
然而,当我对此进行rollapply时,我得到以下内容:
> rollapply(fxr, 62, FUN = dolm)
Error in terms.formula(formula, data = data) :
'.' in formula and no 'data' argument
即使dolm(fxr)本身也能正常工作:
> dolm(fxr)
(Intercept) USDEUR USDGBP USDCHF USDCAD
-1.309268e-05 5.575627e-01 1.664283e-01 -1.657206e-01 6.350490e-01
这里发生了什么?如果dolm是一个更简单的函数,例如mean:
,它似乎工作正常> dolm <- edit(dolm)
> dolm
function(x) {
return(mean(x))
}
> rollapply(fxr, 62, FUN = dolm)
USDZAR USDEUR USDGBP USDCHF USDCAD
2007-11-29 -1.766901e-04 -6.899297e-04 6.252596e-04 -1.155952e-03 7.021468e-04
2007-11-30 -1.266130e-04 -6.512204e-04 7.067767e-04 -1.098413e-03 7.247315e-04
2007-12-03 8.949942e-05 -6.406932e-04 6.637066e-04 -1.154806e-03 8.727564e-04
2007-12-04 2.042046e-04 -5.758493e-04 5.497422e-04 -1.116308e-03 7.124593e-04
2007-12-05 7.343586e-04 -4.899982e-04 6.161819e-04 -1.057904e-03 9.915495e-04
任何帮助非常感谢。基本上我想要的是在62天的滚动窗口中获得USDZAR~USDEUR + USDGBP + USDCHF + USDCAD回归的权重。
答案 0 :(得分:9)
这里有几个问题:
rollapply
传递矩阵,但lm
需要data.frame
。rollapply
将函数分别应用于每个列,除非我们
指定by.column=FALSE
。 rollapplyr
:1)结合以上内容我们有:
dolm <- function(x) coef(lm(USDZAR ~ ., data = as.data.frame(x))))
rollapplyr(fxr, 62, dolm, by.column = FALSE)
2)上面lm
中dolm
的替代方法是使用lm.fit
,它直接与矩阵一起使用并且速度更快:
dolm <- function(x) coef(lm.fit(cbind(Intercept = 1, x[,-1]), x[,1]))
答案 1 :(得分:2)
G. Grothendieck's answer是正确的,但您可以使用rollRegres
包更快地执行此操作,如以下示例所示(roll_regres.fit
函数快〜118倍)
# simulate data
set.seed(101)
n <- 1000
wdth = 100
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(X %*% runif(10)) + rnorm(n)
Z <- cbind(y, X)
# assign other function
dolm <- function(x)
coef(lm.fit(x[, -1], x[, 1]))
# show that they yield the same
library(zoo)
library(rollRegres)
all.equal(
rollapply(Z, wdth, FUN = dolm,
by.column = FALSE, align = "right", fill = NA_real_),
roll_regres.fit(X, y, wdth)$coefs,
check.attributes = FALSE)
#R [1] TRUE
# benchmark
library(compiler)
dolm <- cmpfun(dolm)
microbenchmark::microbenchmark(
newnew = roll_regres.fit(X, y, wdth),
prev = rollapply(Z, wdth, FUN = dolm,
by.column = FALSE, align = "right", fill = NA_real_),
times = 10)
#R Unit: microseconds
#R expr min lq mean median uq max neval
#R newnew 884.938 950.914 1026.134 1025.581 1057.581 1242.075 10
#R prev 111057.822 111903.649 118867.761 116857.726 122087.160 141362.229 10
如果您想使用R公式,也可以使用包中的roll_regres
功能。
第三个选项是更新QR分解中的R矩阵,如下面的代码所示。您可以通过在C ++中执行此操作来加快速度,但是您需要来自LINPACK的dchud
和dchdd
子例程(或其他更新R的函数)
library(SamplerCompare) # for LINPACK `chdd` and `chud`
roll_coef <- function(X, y, width){
n <- nrow(X)
p <- ncol(X)
out <- matrix(NA_real_, n, p)
is_first <- TRUE
i <- width
while(i <= n){
if(is_first){
is_first <- FALSE
qr. <- qr(X[1:width, ])
R <- qr.R(qr.)
# Use X^T for the rest
X <- t(X)
XtY <- drop(tcrossprod(y[1:width], X[, 1:width]))
} else {
x_new <- X[, i]
x_old <- X[, i - width]
# update R
R <- .Fortran(
"dchud", R, p, p, x_new, 0., 0L, 0L,
0., 0., numeric(p), numeric(p),
PACKAGE = "SamplerCompare")[[1]]
# downdate R
R <- .Fortran(
"dchdd", R, p, p, x_old, 0., 0L, 0L,
0., 0., numeric(p), numeric(p), integer(1),
PACKAGE = "SamplerCompare")[[1]]
# update XtY
XtY <- XtY + y[i] * x_new - y[i - width] * x_old
}
coef. <- .Internal(backsolve(R, XtY, p, TRUE, TRUE))
out[i, ] <- .Internal(backsolve(R, coef., p, TRUE, FALSE))
i <- i + 1
}
out
}
# simulate data
set.seed(101)
n <- 1000
wdth = 100
X <- matrix(rnorm(10 * n), n, 10)
y <- drop(X %*% runif(10)) + rnorm(n)
Z <- cbind(y, X)
# assign other function
dolm <- function(x)
coef(lm.fit(x[, -1], x[, 1]))
# show that they yield the same
library(zoo)
all.equal(
rollapply(Z, wdth, FUN = dolm,
by.column = FALSE, align = "right", fill = NA_real_),
roll_coef(X, y, wdth),
check.attributes = FALSE)
#R> [1] TRUE
# benchmark
library(compiler)
roll_coef <- cmpfun(roll_coef)
dolm <- cmpfun(dolm)
microbenchmark::microbenchmark(
new = roll_coef(X, y, wdth),
prev = rollapply(Z, wdth, FUN = dolm,
by.column = FALSE, align = "right", fill = NA_real_),
times = 10)
#R> Unit: milliseconds
#R> expr min lq mean median uq max neval cld
#R> new 8.631319 9.010579 9.808525 9.659665 9.973741 11.87083 10 a
#R> prev 118.257128 121.734860 124.489826 122.882318 127.195410 135.21280 10 b
上述解决方案要求您首先形成model.matrix
和model.response
,但在调用model.frame
之前,这只是三次调用(roll_coef
之外的一次调用)。