我对R很新,我遇到了找到最佳ARIMA模型的问题。到目前为止,我已经模拟了趋势和季节性组件,现在我想用ARIMA模型对周期性组件进行建模。我希望最后的输出包括时间变量的系数,季节变量以及ARIMA变量。我试过用一个循环来找到最优的ARIMA模型和系数,但我得到这个消息:
“optim中的错误(init [mask],armaCSS,method = optim.method,hessian = FALSE,: 由optim“
提供的非有限值我试过在这里寻找其他答案,但我似乎无法弄清楚我做错了什么。
我已经包含了整个代码,以备必要时使用,但最后在运行循环后会出现错误。
我感谢任何帮助,谢谢!
#clear workspace
rm(list=ls())
#load data
setwd("~/Desktop/CBS/HA almen year 3 /Forecasting /R koder ")
data <- scan("onlineretail.txt")
data <- data[2:69] #cut off first period + two last periods for whole years
T=length(data)
s=4
years=T/s
styear=2000
st=c(styear,1)
data = ts(data,start=st, frequency = s)
plot(data)
summary(data)
#plot shows increasing variance - log transform data
lndata <- log(data)
plot(lndata)
dataTSE = decompose(lndata, type="additive")
plot(dataTSE)
########### Trend ##########
t=(1:T)
t2=t^2
lny <- lndata
lmtrend.model <- lm(lny~t)
summary(lmtrend.model)
#linear trend T_t = 8,97 + 0,039533*TIME - both coefficeients significant
#Project 2, explanation why linear is better than quadratic
qtrend.model <- lm(lny~t+t2)
summary(qtrend.model)
lntrend = fitted(lmtrend.model)
lntrend = ts(lntrend, start=st, frequency = s)
#lntrend2 = fitted(qtrend.model)
#lntrend2 = ts(lntrend2, start=st, frequency = s)
residuals=lny-lntrend
par(mar=c(5,5,5,5))
plot(lny, ylim=c(5,12), main="Log e-commerce retail sales")
lines(lntrend, col="blue")
#lines(lntrend2, col="red")
par(new=T)
plot(residuals,ylim=c(-0.2,0.8),ylab="", axes=F)
axis(4, pretty(c(-0.2,0.4)))
abline(h=0, col="grey")
mtext("Residuals", side=4, line=2.5, at=0)
############# Season #################
#The ACF of the residuals confirms the neglected seasonality, because there
#is a clear pattern for every k+4 lags:
acf(residuals)
#Remove trend to observe seasonal factors without the trend:
detrended = residuals
plot(detrended, ylab="ln sales", main="Seasonality in ecommerce retail sales")
abline(h=0, col="grey")
#We can check out the average magnitude of seasonal factors
seasonal.matrix=matrix(detrended, ncol=s, byrow=years)
SeasonalFactor = apply(seasonal.matrix, 2, mean)
SeasonalFactor=ts(SeasonalFactor, frequency = s)
SeasonalFactor
plot(SeasonalFactor);abline(h=0, col="grey")
#We add seasonal dummies to our model of trend and omit the last quarter
library("forecast")
M <- seasonaldummy(lny)
ST.model <- lm(lny ~ t+M)
summary(ST.model)
#ST.model <- tslm(lny~t+season)
#summary(ST.model)
#Both the trend and seasonal dummies appears highly significant
#We will use a Durbin-Watson test to detect serial correlation
library("lmtest")
dwtest(ST.model)
#The DW value is 0.076396. This is quite small, as the value should be around
2
#and we should therefore try to improve the model with a cyclical component
#I will construct a plot that shows how the model fits the data and
#how the residuals look
lntrend=fitted(ST.model)
lntrend = ts(lntrend, start=st, frequency = s)
residuals=lny-lntrend
par(mar=c(5,5,5,5))
plot(lny, ylim=c(5,12), main="Log e-commerce retail sales")
lines(lntrend, col="blue")
#tell R to draw over the current plot with a new one
par(new=T)
plot(residuals,ylim=c(-0.2,0.8),ylab="", axes=F)
axis(4, pretty(c(-0.2,0.4)))
abline(h=0, col="grey")
mtext("Residuals", side=4, line=2.5, at=0)
############## Test for unit root ############
#We will check if the data is stationary, and to do so we will
#test for unit root.
#To do so, we will perform a Dickey-Fuller test. First, we have to remove
seasonal component.
#We can also perform an informal test with ACF and PACF
#the autocorrelation function shows that the data damps slowly
#while the PACF is close to 1 at lag 1 and then lags become insignificant
#this is informal evidence of unit root
acf(residuals)
pacf(residuals)
#Detrended and deseasonalized data
deseason = residuals
plot(deseason)
#level changes a lot over time, not stationary in mean
#Dickey-Fuller test
require(urca)
test <- ur.df(deseason, type = c("trend"), lags=3, selectlags = "AIC")
summary(test)
#We do not reject that there is a unit root if
# |test statistics| < |critical value|
# 1,97 < 4,04
#We can see from the output that the absolute value of the test statistics
#is smaller than the critical value. Therefore, there is no evidence against
the unit root.
#We check the ACF and PACF in first differences. There should be no
significant lags
#if the data is white noise in first differences.
acf(diff(deseason))
pacf(diff(deseason))
deseasondiff = diff(deseason, differences = 2)
plot(deseasondiff)
test2 <- ur.df(deseasondiff, type=c("trend"), lags = 3, selectlags = "AIC")
summary(test2)
#From the plot and the Dickey-Fuller test, it looks like we need to difference
twice
############# ARIMA model ############
S1 = rep(c(1,0,0,0), T/s)
S2 = rep(c(0,1,0,0), T/s)
S3 = rep(c(0,0,1,0), T/s)
TrSeas = model.matrix(~ t+S1+S2+S3)
#Double loop for finding the best fitting ARIMA model and since there was
#a drift, we include this in the model
best.order <- c(0, 2, 0)
best.aic <- Inf
for (q in 1:6) for (p in 1:6) {
fit.aic <- AIC(arima(lny,order = c(p,2, q),include.mean = TRUE,xreg=TrSeas))
print(c(p,q,fit.aic))
if (fit.aic < best.aic) {
best.order <- c(p, 0, q)
best.arma <- arima(lny,order = c(p, 2, q),include.mean = TRUE,xreg=TrSeas)
best.aic <- fit.aic
}
}
best.order