如何将R中的ARIMA模型与用于创建模型的实际观察结果进行比较?

时间:2015-06-24 15:11:15

标签: r regression simulation

我一直在使用R预测包的auto.arima()功能,以便将ARIMA模型与我的时间序列数据相匹配。我想看看ARIMA模型与原始数据的拟合程度如何。我希望在同一个情节中绘制我原来的时间序列和ARIMA模拟,看看它们的匹配程度。我怎么能这样做?

谢谢!

1 个答案:

答案 0 :(得分:0)

我不确定你在寻找什么,但我想你想对你的模型的准确性有一些指示。 您可以在Hyndman和Athanasopoulos的在线书籍中找到关于该主题的详细信息:https://www.otexts.org/fpp/2/5

这是一个基于数据集" AirPassengers"

的示例
library(forecast)

data(AirPassengers)

# inspect the series

AirPassengers

plot(AirPassengers)


# data are assigned to a convenient vector

series <- AirPassengers

# the accuracy of forecasts can only be determined 
# by considering how well a model performs on new data that were not used when fitting the model.
# The size of the test set is typically about 20% of the total sample 

# training set
# use data from 1949 to 1956 for forecasting

sr = window(series, start=1949, end=c(1956,12))

# test set
# use remaining data from 1957 to 1960 to test accuracy

ser = window(series, start=1957, end=c(1960,12))



######################################################################
# plot training set
######################################################################

plot(sr, main="AirPassengers", ylab="", xlab="Months")

# plot forecasting for 5 years according to four methods
lines(meanf(sr,h=48)$mean, col=4)
lines(rwf(sr,h=48)$mean, col=2)
lines(rwf(sr,drift=TRUE,h=48)$mean, col=3)
lines(snaive(sr,h=48)$mean, col=5)

# legend
legend("topleft", lty=1, col=c(4,2,3, 5),
legend=c("Mean method","Naive method","Drift method", "Seasonal naïve method"),bty="n")

# the test set
lines(ser, col="red")


# accuracy for forecasting of sr (forecasted data) on ser (original data)
# the best model had the lowest error (particularly the MAPE, Mean absolute percentage error)

# Mean method
accuracy(meanf(sr,h=48), ser)

# Naive method
accuracy(rwf(sr,h=48), ser)

# Drift method
accuracy(rwf(sr,drift=TRUE,h=48), ser)

# Seasonal naïve method
accuracy(snaive(sr,h=48), ser)



######################################################################
# plot test set only with the predictions
######################################################################

# calculate the forecasting

sr.mean <- meanf(sr,h=48)$mean
sr.naive <- rwf(sr,h=48)$mean
sr.drift <- rwf(sr,drift=TRUE,h=48)$mean
sr.seas <- snaive(sr,h=48)$mean

# plot the test set
plot(ser, main="AirPassengers", ylab="", xlab="Months", ylim = c(200,600))

# plot forecasting for 4 years according to four methods
lines(sr.mean, col=4)
lines(sr.naive, col=2)
lines(sr.drift, col=3)
lines(sr.seas, col=5)

# legend
legend("topleft", lty=1, col=c(4,2,3,5),
legend=c("Mean method","Naive method","Drift method", "Seasonal naïve method"),bty="n")



########################################################################
# for ARIMA; Hyndman suggest to use auto-arima without stepwise 
########################################################################

library(fpp)

trainData <- sr
testData <- ser

#  the default value in auto.arima() is test="kpss". 
# A KPSS test has a null hypothesis of stationarity
# In general, all the defaults are set to the values that give the best forecasts on average.

# CAUTION! Takes a while to compute

arimaMod <- auto.arima(trainData, stepwise=FALSE, approximation=FALSE)
arimaMod.Fr <-forecast(arimaMod,h=48)

# plot of the prediction and of the test set

plot(arimaMod.Fr)
lines(testData, col="red")
legend("topleft",lty=1,bty = "n",col=c("red","blue"),c("testData","ARIMAPred"))



# plot of the test set and its prediction only

AR.mean <-forecast(arimaMod,h=48)$mean

plot(testData, main="AirPassengers", ylab="", xlab="Months", col="darkblue")  
lines(AR.mean, col="red")

# accuracy

accuracy(arimaMod.Fr,testData)

# test residues of arima

tsdisplay(residuals(arimaMod))