data1 <- read.csv(file.choose(), header=TRUE, sep=",", dec=",")
data<-ts(data1[,2],start = c(1990,1),frequency=1)
#Plot yrs vs total cases as time series'
plot(data, xlab="Years", ylab = "Total cases",type="o",col="blue",font.lab=2)
# Difference data to make data stationary on mean (remove trend)
plot(diff(data,lag=1),ylab="Differenced Total Cases",col="red",font.lab=2,type="o")
#log transform data to make data stationary on variance
plot(log10(data),ylab="Log (Total Cases)",col="blue",font.lab=2,type="o")
#Difference log transform data to make data stationary on both mean and variance
plot(diff(log10(data),lag=1),ylab="Differenced Log (Total Cases)",col="red",font.lab=2,type="o")
#Plot ACF and PACF to identify potential AR and MA model
par(mfrow = c(1,2))
acf(ts(diff(log10(data))),main="ACF Total Cases")
pacf(ts(diff(log10(data))),main="PACF Total Cases")
#Identification of best fit ARIMA model
ARIMAfit <- auto.arima(log10(data), approximation=FALSE,trace=FALSE)
summary(ARIMAfit)
#Forecast No. of Cases using the best fit ARIMA model
pred <- predict(ARIMAfit, n.ahead = 3)
运行包含预测函数的最后一行时出现此错误:
Error in array(x, c(length(x), 1L), if (!is.null(names(x))) list(names(x), :
'data' must be of a vector type, was 'NULL'
是因为我的数据是表格形式的吗?
以下是数据:
structure(list(Year = 1990:2012, Total.cases = c(135.146, 136.633, 156.307, 152.259, 155.899, 154.554, 179.825, 188.216, 184.277, 159.217, 177.259, 203.859, 246.246, 265.461, 245.09, 217.274, 249.885, 254.159, 242.657, 266.083, 283.69, 314.679, 326.488)), .Names = c("Year", "Total.cases"), class = "data.frame", row.names = c(NA, -23L))