我在旅游地点模拟每日和每周季节性的每日废水流量。我通过使用傅立叶项来模拟两种季节性模式,包括降水量,废水连接数和假期作为回归项,并使用xreg将所有这些信息输入auto.arima来完成此操作。合身非常好。拟合值与观测值之间的相关性为0.986。我遇到的问题是预测值不符合历史值。模式是正确的,但它们向下移动。任何帮助将不胜感激。包括图表,代码和数据样本。提前致谢!对不起,如果我做错了堆栈!
MainData <- read.delim("MainData.txt")
PrecipR <- read.delim("PrecipR.txt")
library("dplyr")
library("forecast")
library("tidyr")
library('ggplot2')
#turn rain data into a daily rate (sum of rain divided by number of hours with rain)
duplicats<-duplicated(PrecipR[,1:4])
index<-which(duplicats==FALSE)
PrecipR<-PrecipR[index,]
dsum<-aggregate(Precip~Year + Month+ Day, sum, data=PrecipR)
dcount<-aggregate(Precip~Year + Month+ Day, FUN=function(Precip){sum(Precip>0)}, data=PrecipR)
prate<-dsum$Precip/dcount$Precip
precip<-cbind(dsum, dcount[,4])
colnames(precip)[5]<-"hcount"
#combine data
dat<-MainData[order(MainData$Year, MainData$Month, MainData$Day),]
dat<-merge(dat, precip, all.x=TRUE)
dat<-dat[order(dat$Year, dat$Month, dat$Day),]
#move to beginning of cycle and attach data
dat2006<-dat[which(dat$Year==2006),]
start<-which(dat$Influent.Flow==min(dat2006$Influent.Flow))
dat<-dat[-seq(1:start),]
attach(dat)
#add holiday dummy
holiday<-rep(0, nrow(dat))
holiday[which(dat$Month == 7 & dat$Day==4)]<-1
holiday[which(dat$Month == 8 & dat$Day==18 & dat$Year==2016)]<-1
holiday[which(dat$Month == 8 & dat$Day==20 & dat$Year==2015)]<-1
holiday[which(dat$Month == 8 & dat$Day==21 & dat$Year==2014)]<-1
holiday[which(dat$Month == 8 & dat$Day==22 & dat$Year==2013)]<-1
holiday[which(dat$Month == 8 & dat$Day==16 & dat$Year==2012)]<-1
holiday[which(dat$Month == 8 & dat$Day==18 & dat$Year==2011)]<-1
holiday[which(dat$Month == 8 & dat$Day==19 & dat$Year==2010)]<-1
holiday[which(dat$Month == 8 & dat$Day==20 & dat$Year==2009)]<-1
holiday[which(dat$Month == 8 & dat$Day==21 & dat$Year==2008)]<-1
holiday[which(dat$Month == 8 & dat$Day==16 & dat$Year==2007)]<-1
holiday[which(dat$Month == 8 & dat$Day==17 & dat$Year==2006)]<-1
index<-which(holiday==1)
holiday[index+1]<-1
holiday[index-1]<-1
#model auto.arima with weekly and yearly seasons using msts and Fourier terms
flow.msts<-msts(Influent.Flow, seasonal.periods=c(7, 365.25))
y <- msts(Influent.Flow, seasonal.periods=c(7,365.25))
z <- fourier(y, K=c(2,2))
covariates.msts = cbind(Precip,
Connections,
holiday)
#main model
fit <- auto.arima(y, xreg=cbind(z,covariates.msts), seasonal=FALSE)
cor(Influent.Flow, as.numeric(fit$fitted), use = "pairwise.complete.obs")
x<-seq(1:length(Influent.Flow))
plot(x, Influent.Flow, ylim=c(0,800000), main="Daily Waste Water Flow, Observed and Modeled Values", ylab="Flow (MGD)", xlab="Daily Values, 2006 to February 2017")
lines(x, fit$fitted, col="red")
#forcast between 2/17/17 and 5/23/17
p<-precip$Precip[which(precip$Year==2017,precip$Month>=2)]
covariates.forecast = cbind(Precip<-p[seq(1:95)],
Connections<-rep(max(Connections),95),
holiday<-rep(0,95))
colnames(covariates.forecast)<-colnames(covariates.msts)
zf <- fourier(y, K=c(2,2), h=95)
fc <- forecast(fit, xreg=cbind(zf,covariates.forecast), h=95)
> head(MainData)
Date Month Day Year Influent.Flow Connections
1 1/1/06 1 1 2006 141166 484672
2 1/2/06 1 2 2006 99883 484672
3 1/3/06 1 3 2006 108132 484672
4 1/4/06 1 4 2006 88356 484672
5 1/5/06 1 5 2006 89273 484672
6 1/6/06 1 6 2006 71614 484672
> head(PrecipR)
Year Month Day Hour Precip
1 2006 1 1 0 0.03
3 2006 1 1 1 0.02
4 2006 1 1 2 0.01
5 2006 1 1 3 0.00
6 2006 1 1 4 0.00
8 2006 1 1 5 0.02
答案 0 :(得分:0)
我解决了自己的问题。模型过于贴合。我将傅里叶项的数量从2减少到1.这只是将拟合值和观测值之间的相关性从0.986降低到0.983。现在预测是有道理的。链接到下面的新结果。