按组滚动时间序列回归

时间:2017-06-01 19:06:24

标签: r regression

我正在研究R中的以下数据:

# Getting stock data
library(quantmod)
tickers <- c("ASC.OL",  "AFG.OL",   "AKA.OL",   "AKER.OL",  "AKERBP.OL",    "AKSO.OL",  "AKVA.OL",  "AMSC.OL",  "APP.OL",   "AQUA.OL",  "ARCHER.OL",    "ARCUS.OL", "AFK.OL",   "ASETEK.OL",    "ATEA.OL",  "ATLA-NOK.OL",  "AURG.OL",  "AUSS.OL",  "AVANCE.OL",    "AVM.OL",   "AXA.OL",   "B2H.OL",   "BAKKA.OL", "BEL.OL",   "BERGEN.OL",    "BGBIO.OL", "BIOTEC.OL",    "BON.OL",   "BOR.OL",   "BRG.OL",   "BOUVET.OL",    "BWLPG.OL", "BWO.OL",   "BMA.OL",   "COV.OL",   "CXENSE.OL",    "DAT.OL",   "DESSC.OL", "DNB.OL",   "DNO.OL",   "DOF.OL",   "EIOF.OL",  "EKO.OL",   "EMGS.OL",  "EMAS.OL",  "ENTRA.OL", "EPR.OL",   "FAR.OL",   "FOE.OL",   "FRO.OL",   "FUNCOM.OL",    "GIG.OL",   "RISH.OL",  "GJF.OL",   "GOGL.OL",  "GOD.OL",   "GSF.OL",   "GYL.OL",   "HNA.OL",   "HNB.OL",   "HAVI.OL",  "HYARD.OL", "HELG.OL",  "HEX.OL",   "HIDDN.OL", "HLNG.OL",  "HSPG.OL",  "IMSK.OL",  "IDEX.OL",  "INC.OL",   "ISSG.OL",  "INSR.OL",  "IOX.OL",   "ITX.OL",   "ITE.OL",   "JIN.OL",   "JAEREN.OL",    "KID.OL",   "KIT.OL",   "KOA.OL",   "KOG.OL",   "KVAER.OL", "LSG.OL",   "LINK.OL",  "MHG.OL",   "MEDI.OL",  "MELG.OL",  "MULTI.OL", "NAPA.OL",  "NAVA.OL",  "NEL.OL",   "NEXT.OL",  "NGT.OL",   "NANO.OL",  "NOD.OL",   "NHY.OL",   "NSG.OL",   "NRS.OL",   "NAS.OL",   "NOR.OL",   "NOFI.OL",  "NPRO.OL",  "NRC.OL",   "NTS.OL",   "OCY.OL",   "OTS.OL",   "ODL.OL",   "ODF.OL",   "ODFB.OL",  "OLT.OL",   "OPERA.OL", "ORK.OL",   "PEN.OL",   "PARB.OL",  "PGS.OL",   "PDR.OL",   "PHO.OL",   "PLCS.OL",  "POL.OL",   "PRS.OL",   "PROTCT.OL",    "QFR.OL",   "QEC.OL",   "RAKP.OL",  "REACH.OL", "REC.OL",   "RENO.OL",  "SDSD.OL",  "SAFE.OL",  "SALM.OL",  "SADG.OL",  "SAS-NOK.OL",   "SSO.OL",   "SCHA.OL",  "SCHB.OL",  "SBX.OL",   "SDRL.OL",  "SBO.OL",   "SEVDR.OL", "SEVAN.OL", "SIOFF.OL", "SKBN.OL",  "SKI.OL",   "SKUE.OL",  "SOLON.OL", "SOFF.OL",  "SOFFB.OL", "SOLV.OL",  "SONG.OL",  "SBVG.OL",  "NONG.OL",  "RING.OL",  "MING.OL",  "SRBANK.OL",    "SOAG.OL",  "MORG.OL",  "SOR.OL",   "SVEG.OL",  "SPOG.OL",  "SPU.OL",   "STL.OL",   "SNI.OL",   "STB.OL",   "STORM.OL", "STRONG.OL",    "SUBC.OL",  "TIL.OL",   "TRVX.OL",  "TEAM.OL",  "TECH.OL",  "TEL.OL",   "TGS.OL",   "SSC.OL",   "THIN.OL",  "TOM.OL",   "TOTG.OL",  "TRE.OL",   "TTS.OL",   "VEI.OL",   "VVL.OL",   "WWL.OL",   "WEIFA.OL", "WRL.OL",   "WWI.OL",   "WWIB.OL",  "WILS.OL",  "XXL.OL",   "YAR.OL",   "ZAL.OL")
dataEnv <- new.env()
out <- sapply(tickers, function(s) tryCatch({ getSymbols(s , env = dataEnv) }, error = function(e) NA))
plist <- eapply(dataEnv, Ad)
pframe <- do.call(merge, plist)
names(pframe) <- gsub(".Adjusted", "", names(pframe))
names(pframe) <- gsub(".X", "", names(pframe))
pframe <- log(pframe)
pframe <- diff(pframe, lag = 1)
pframe<-t(pframe)
library(reshape2)
pframe<- melt(pframe)
library(data.table)
setnames(pframe, old=c("Var1","Var2", "value"), new=c("Ticker", "Date", "Returns"))

# Getting index data
index <- c("^GSPC")
dataEnv1 <- new.env()
out1 <- sapply(index, function(s) tryCatch({ getSymbols(s , env = dataEnv1) }, error = function(e) NA))
index <- eapply(dataEnv1, Ad)
index <- do.call(merge, index)
names(index) <- gsub(".Adjusted", "", names(index))
names(index) <- gsub(".X", "", names(index))
index <- log(index)
index <- diff(index, lag = 1)
index<-t(index)
index<- melt(index)
library(data.table)
setnames(index, old=c("Var1","Var2", "value"), new=c("Index", "Date", "Index_Returns"))

# Getting curr data
curr <- c("NOK=X")
dataEnv1 <- new.env()
out1 <- sapply(curr, function(s) tryCatch({ getSymbols(s , env = dataEnv1) }, error = function(e) NA))
curr <- eapply(dataEnv1, Ad)
curr <- do.call(merge, curr)
names(curr) <- gsub(".Adjusted", "", names(curr))
names(curr) <- gsub(".X", "", names(curr))
curr <- log(curr)
curr <- diff(curr, lag = 1)
curr<-t(curr)
curr<- melt(curr)
library(data.table)
setnames(curr, old=c("Var1","Var2", "value"), new=c("NOK", "Date", "NOK_Returns"))

NOK_index <- merge(index, curr, by="Date")
NOK_index$Index <- NULL
NOK_index$NOK <- NULL

Final <- merge(pframe, NOK_index, by="Date")
Final <- na.omit(Final)

我试图在名为&#34; Final&#34;的数据集中按组运行滚动时间序列回归。 group-id是名为&#34; Ticker&#34;的变量。我想要的窗口是基于至少120个观察,最多240个。我想倒退&#34;返回&#34;在&#34; Market_ret&#34;。目前正在处理下面的回归代码,试图让回归滚动。有什么建议吗?

library(dplyr)
library(magrittr)   
library(broom)
## Fit models
fitted_model <- Final %>% group_by(Ticker) %>% do(tidy(lm(Returns ~ Index_Returns + NOK_Returns, data = .)))

我尝试了下面的代码,但它需要花费很多时间(得到以下消息:剩余1%~5小时,在完成之前取消它。)

library(dplyr)
library(magrittr)   
library(broom)
## Fit models
fitted_model <- Final %>% group_by(Ticker) %>% do(tidy(rollapply(. , width=262, FUN = function(Z) { t = lm(formula=Returns ~ Index_Returns + NOK_Returns, data = as.data.frame(Z), na.rm=T); return(t$coef) }, by.column=FALSE, align="right")))

1 个答案:

答案 0 :(得分:0)

如果要对组运行OLS回归,可以尝试使用dplyr包。使用do()函数将运行模型并将它们存储在名为model的变量中。

library(dplyr)
library(magrittr)    
## Fit models
fitted_model <- Final %>%
    group_by(Ticker) %>%
    do(model = lm(Returns ~ Market_ret, data = .))

要将系数检查提取到R中的broom包中。运行tidy(model)应该可以解决问题。