我一直在使用R预测包的auto.arima()
功能,以便将ARIMA模型与我的时间序列数据相匹配。我想看看ARIMA模型与原始数据的拟合程度如何。我希望在同一个情节中绘制我原来的时间序列和ARIMA模拟,看看它们的匹配程度。我怎么能这样做?
谢谢!
答案 0 :(得分:0)
我不确定你在寻找什么,但我想你想对你的模型的准确性有一些指示。 您可以在Hyndman和Athanasopoulos的在线书籍中找到关于该主题的详细信息:https://www.otexts.org/fpp/2/5
这是一个基于数据集" AirPassengers"
的示例library(forecast)
data(AirPassengers)
# inspect the series
AirPassengers
plot(AirPassengers)
# data are assigned to a convenient vector
series <- AirPassengers
# the accuracy of forecasts can only be determined
# by considering how well a model performs on new data that were not used when fitting the model.
# The size of the test set is typically about 20% of the total sample
# training set
# use data from 1949 to 1956 for forecasting
sr = window(series, start=1949, end=c(1956,12))
# test set
# use remaining data from 1957 to 1960 to test accuracy
ser = window(series, start=1957, end=c(1960,12))
######################################################################
# plot training set
######################################################################
plot(sr, main="AirPassengers", ylab="", xlab="Months")
# plot forecasting for 5 years according to four methods
lines(meanf(sr,h=48)$mean, col=4)
lines(rwf(sr,h=48)$mean, col=2)
lines(rwf(sr,drift=TRUE,h=48)$mean, col=3)
lines(snaive(sr,h=48)$mean, col=5)
# legend
legend("topleft", lty=1, col=c(4,2,3, 5),
legend=c("Mean method","Naive method","Drift method", "Seasonal naïve method"),bty="n")
# the test set
lines(ser, col="red")
# accuracy for forecasting of sr (forecasted data) on ser (original data)
# the best model had the lowest error (particularly the MAPE, Mean absolute percentage error)
# Mean method
accuracy(meanf(sr,h=48), ser)
# Naive method
accuracy(rwf(sr,h=48), ser)
# Drift method
accuracy(rwf(sr,drift=TRUE,h=48), ser)
# Seasonal naïve method
accuracy(snaive(sr,h=48), ser)
######################################################################
# plot test set only with the predictions
######################################################################
# calculate the forecasting
sr.mean <- meanf(sr,h=48)$mean
sr.naive <- rwf(sr,h=48)$mean
sr.drift <- rwf(sr,drift=TRUE,h=48)$mean
sr.seas <- snaive(sr,h=48)$mean
# plot the test set
plot(ser, main="AirPassengers", ylab="", xlab="Months", ylim = c(200,600))
# plot forecasting for 4 years according to four methods
lines(sr.mean, col=4)
lines(sr.naive, col=2)
lines(sr.drift, col=3)
lines(sr.seas, col=5)
# legend
legend("topleft", lty=1, col=c(4,2,3,5),
legend=c("Mean method","Naive method","Drift method", "Seasonal naïve method"),bty="n")
########################################################################
# for ARIMA; Hyndman suggest to use auto-arima without stepwise
########################################################################
library(fpp)
trainData <- sr
testData <- ser
# the default value in auto.arima() is test="kpss".
# A KPSS test has a null hypothesis of stationarity
# In general, all the defaults are set to the values that give the best forecasts on average.
# CAUTION! Takes a while to compute
arimaMod <- auto.arima(trainData, stepwise=FALSE, approximation=FALSE)
arimaMod.Fr <-forecast(arimaMod,h=48)
# plot of the prediction and of the test set
plot(arimaMod.Fr)
lines(testData, col="red")
legend("topleft",lty=1,bty = "n",col=c("red","blue"),c("testData","ARIMAPred"))
# plot of the test set and its prediction only
AR.mean <-forecast(arimaMod,h=48)$mean
plot(testData, main="AirPassengers", ylab="", xlab="Months", col="darkblue")
lines(AR.mean, col="red")
# accuracy
accuracy(arimaMod.Fr,testData)
# test residues of arima
tsdisplay(residuals(arimaMod))