来自auto.arima的预测并不与历史数据对齐

时间:2017-06-27 19:57:44

标签: r time-series

我在旅游地点模拟每日和每周季节性的每日废水流量。我通过使用傅立叶项来模拟两种季节性模式,包括降水量,废水连接数和假期作为回归项,并使用xreg将所有这些信息输入auto.arima来完成此操作。合身非常好。拟合值与观测值之间的相关性为0.986。我遇到的问题是预测值不符合历史值。模式是正确的,但它们向下移动。任何帮助将不胜感激。包括图表,代码和数据样本。提前致谢!对不起,如果我做错了堆栈!

Graph of Fit

Graph of Forecast

MainData <- read.delim("MainData.txt")
PrecipR <- read.delim("PrecipR.txt")
library("dplyr")
library("forecast")
library("tidyr")
library('ggplot2')

#turn rain data into a daily rate (sum of rain divided by number of hours with rain)
duplicats<-duplicated(PrecipR[,1:4])
index<-which(duplicats==FALSE)
PrecipR<-PrecipR[index,]
dsum<-aggregate(Precip~Year + Month+ Day, sum, data=PrecipR)
dcount<-aggregate(Precip~Year + Month+ Day, FUN=function(Precip){sum(Precip>0)}, data=PrecipR)
prate<-dsum$Precip/dcount$Precip
precip<-cbind(dsum, dcount[,4])
colnames(precip)[5]<-"hcount"

#combine data
dat<-MainData[order(MainData$Year, MainData$Month, MainData$Day),]
dat<-merge(dat, precip, all.x=TRUE)
dat<-dat[order(dat$Year, dat$Month, dat$Day),]

#move to beginning of cycle and attach data
dat2006<-dat[which(dat$Year==2006),]
start<-which(dat$Influent.Flow==min(dat2006$Influent.Flow))
dat<-dat[-seq(1:start),]
attach(dat)

#add holiday dummy
holiday<-rep(0, nrow(dat))
holiday[which(dat$Month == 7 & dat$Day==4)]<-1
holiday[which(dat$Month == 8 & dat$Day==18 & dat$Year==2016)]<-1
holiday[which(dat$Month == 8 & dat$Day==20 & dat$Year==2015)]<-1
holiday[which(dat$Month == 8 & dat$Day==21 & dat$Year==2014)]<-1
holiday[which(dat$Month == 8 & dat$Day==22 & dat$Year==2013)]<-1
holiday[which(dat$Month == 8 & dat$Day==16 & dat$Year==2012)]<-1
holiday[which(dat$Month == 8 & dat$Day==18 & dat$Year==2011)]<-1
holiday[which(dat$Month == 8 & dat$Day==19 & dat$Year==2010)]<-1
holiday[which(dat$Month == 8 & dat$Day==20 & dat$Year==2009)]<-1
holiday[which(dat$Month == 8 & dat$Day==21 & dat$Year==2008)]<-1
holiday[which(dat$Month == 8 & dat$Day==16 & dat$Year==2007)]<-1
holiday[which(dat$Month == 8 & dat$Day==17 & dat$Year==2006)]<-1
index<-which(holiday==1)
holiday[index+1]<-1
holiday[index-1]<-1

#model auto.arima with weekly and yearly seasons using msts and Fourier terms
flow.msts<-msts(Influent.Flow, seasonal.periods=c(7, 365.25))
y <- msts(Influent.Flow, seasonal.periods=c(7,365.25))
z <- fourier(y, K=c(2,2))

covariates.msts = cbind(Precip,
                        Connections,
                        holiday)
#main model
fit <- auto.arima(y, xreg=cbind(z,covariates.msts), seasonal=FALSE)
cor(Influent.Flow, as.numeric(fit$fitted),  use = "pairwise.complete.obs")

x<-seq(1:length(Influent.Flow))
plot(x, Influent.Flow, ylim=c(0,800000), main="Daily Waste Water Flow, Observed and Modeled Values", ylab="Flow (MGD)", xlab="Daily Values, 2006 to February 2017")
lines(x, fit$fitted, col="red")

#forcast between 2/17/17 and 5/23/17
p<-precip$Precip[which(precip$Year==2017,precip$Month>=2)]
covariates.forecast = cbind(Precip<-p[seq(1:95)],
                            Connections<-rep(max(Connections),95),
                            holiday<-rep(0,95))
colnames(covariates.forecast)<-colnames(covariates.msts)
zf <- fourier(y, K=c(2,2), h=95)

fc <- forecast(fit, xreg=cbind(zf,covariates.forecast), h=95)
> head(MainData)
    Date Month Day Year Influent.Flow Connections
1 1/1/06     1   1 2006        141166      484672
2 1/2/06     1   2 2006         99883      484672
3 1/3/06     1   3 2006        108132      484672
4 1/4/06     1   4 2006         88356      484672
5 1/5/06     1   5 2006         89273      484672
6 1/6/06     1   6 2006         71614      484672
> head(PrecipR)
  Year Month Day Hour Precip
1 2006     1   1    0   0.03
3 2006     1   1    1   0.02
4 2006     1   1    2   0.01
5 2006     1   1    3   0.00
6 2006     1   1    4   0.00
8 2006     1   1    5   0.02

1 个答案:

答案 0 :(得分:0)

我解决了自己的问题。模型过于贴合。我将傅里叶项的数量从2减少到1.这只是将拟合值和观测值之间的相关性从0.986降低到0.983。现在预测是有道理的。链接到下面的新结果。

New Forecast