使用midasr进行预测时出错(包含可重现的示例)

时间:2014-04-19 03:56:52

标签: r forecasting

代码是自包含的,但下面链接的数据集除外。

代码中使用的

.csv文件,请先下载: https://drive.google.com/?authuser=0#folders/0B1ciW4R5hjUCRFpjQlJKZGFqcVU

library(midasr)
library(zoo)


 yvellaregdata <- read.table("~/Desktop/attempt1/ymonthlyjackson.csv", quote="\"")
 yvellareg <- ts(yvellaregdata, start=c(2008,7), frequency=12)

 xvellareginit <- read.table("~/Desktop/attempt1/xdailyjackson.csv", quote="\"")
 xvellaregzoo <- zoo(xvellareg)
 xvellareg <- as.numeric(xvellaregzoo) #i had to convert to numeric for it to work

#yvellareg is the monthly y variable
#xvellareg is the daily x variable
 betareg <- midas_r(yvellareg ~ mls(yvellareg, 1, 1) + mls(xvellareg, 3:25, 30), start=NULL)
 summary(betareg)


#Defining data for forecasting
 xdailyfulldataread <- read.table("~/Desktop/attempt1/xdailyfulldatajackson.csv", quote="\"")
 xdailyfulldata <- zoo(xdailyfulldataread)
 xdailyfulldata <- as.numeric(xdailyfulldata)

 ymonthlyfulldataread <- read.table("~/Desktop/attempt1/ymonthlyfulldatajackson.csv", quote="\"")
 ymonthlyfulldata <- ts(ymonthlyfulldataread,start=c(2008,7), frequency=12)


fulldata <- list(xx=xdailyfulldata,
                   yy=ymonthlyfulldata)
insample <- 1:length(yvellareg)
outsample <- (1:length(fulldata$yy))[-insample]

#errorhere
avgf<-average_forecast(list(betareg),
                       data=fulldata,
                       insample=insample,
                       outsample=outsample)
sqrt(avgf$accuracy$individual$MSE.out.of.sample)

1 个答案:

答案 0 :(得分:4)

由于您已经在R外部准备了样本内和完整样本的数据,因此无需将其转换为时间序列对象。

以下是代码的清理版本,它假定数据文件位于R工作目录中:

library(midasr)

yvellareg <- scan("ymonthlyjackson.csv")
xvellareg <- scan("xdailyjackson.csv")

#yvellareg is the monthly y variable
#xvellareg is the daily x variable
 betareg <- midas_r(yvellareg ~ mls(yvellareg, 1, 1) + mls(xvellareg, 3:25, 30), start=NULL)
 summary(betareg)


#Defining data for forecasting
xdailyfulldata <- scan("xdailyfulldatajackson.csv")
ymonthlyfulldata <- scan("ymonthlyfulldatajackson.csv")


fulldata <- list(xvellareg=xdailyfulldata,
                   yvellareg=ymonthlyfulldata)
insample <- 1:length(yvellareg)
outsample <- (1:length(fulldata$yvellareg))[-insample]

#errorhere
avgf<-average_forecast(list(betareg),
                       data=fulldata,
                       insample=insample,
                       outsample=outsample)
sqrt(avgf$accuracy$individual$MSE.out.of.sample)

但是这仍然会引发错误,因为您的数据不符合要求。包midasr期望每个低频周期具有相同数量的高频周期。在你的情况下,这是30.但我们有

> length(xdailyfulldata)
[1] 1230
> length(ymonthlyfulldata)
[1] 42
> 1230/42
[1] 29.28571

42*30=1260以来,您似乎每月的观察次数多于每日观察次数。删除一个月度观察使代码运行没有错误:

fulldata <- list(xvellareg=xdailyfulldata,
                   yvellareg=ymonthlyfulldata[-42])
insample <- 1:length(yvellareg)
outsample <- (1:length(fulldata$yvellareg))[-insample]

#errorhere
avgf<-average_forecast(list(betareg),
                       data=fulldata,
                       insample=insample,
                       outsample=outsample)
sqrt(avgf$accuracy$individual$MSE.out.of.sample)
[1] 1.118709