解释ARIMA加权代码背后的方法

时间:2014-11-10 17:05:56

标签: r time-series weighting

我有一个代码,它运行ARIMA模型,重点放在最近的错误上,它提供了出色的结果,比简单的ARIMA好得多,但我不理解它背后的方法。如果你能理解发生了什么以及为什么以及如何运作那么我真的很感激它:)

我想解释的代码来自#---加权---

suppressMessages(library(lmtest))
suppressMessages(library(tseries))
suppressMessages(library(forecast))
suppressMessages(library(TTR))
#-------------------------------------------------------------------------------
Input.data <- matrix(c("8Q1","8Q2","8Q3","8Q4","9Q1","9Q2","9Q3","9Q4","10Q1","10Q2","10Q3","10Q4","11Q1","11Q2","11Q3","11Q4","12Q1","12Q2","12Q3","12Q4","13Q1","13Q2","13Q3","13Q4","14Q1","14Q2","14Q3",5403.675741,6773.504993,7231.117289,7835.55156,5236.709983,5526.619467,6555.781711,11464.72728,7210.068674,7501.610403,8670.903486,10872.93518,8209.022658,8153.393088,10196.44775,13244.50201,8356.732878,10188.44157,10601.32205,12617.82102,11786.52641,10044.98676,11006.0051,15101.9456,10992.27282,11421.18922,10731.31198),ncol=2,byrow=FALSE)

#-------------------------------------------------------------------------------
# Maximum seasonal differences allowed. For typical series, 0 is recommended.

max.sdiff <- 2 

#-------------------------------------------------------------------------------
# Force seasonality

arima.force.seasonality <- "y"

#-------------------------------------------------------------------------------
# The frequency of the data. 1/4 for QUARTERLY, 1/12 for MONTHLY

Frequency <- 1/4

#-------------------------------------------------------------------------------
# How many quarters/months to forecast

Forecast.horizon <- 4

#-------------------------------------------------------------------------------
# The first date in the series. Use c(8, 1) to denote 2008 q1

Start.date <- c(8, 1)

#-------------------------------------------------------------------------------
# The dates of the forecasts

Forecast.dates <- c("14Q4", "15Q1", "15Q2", "15Q3")

#-------------------------------------------------------------------------------
# Set if the data should be logged. Takes value "s" (lets script choose logging)
#"level" (forces levels) or "log" (forces logs)

force.log <- "s"

#-------------------------------------------------------------------------------
# Selects the data column from Input.data

Data.col <- as.numeric(Input.data[, length(Input.data[1, ])])

#-------------------------------------------------------------------------------
# Turns the Data.col into a time-series

Data.col.ts <- ts(Data.col, deltat=Frequency, start = Start.date)

#-------------------------------------------------------------------------------
# A character vector of the dates from Input.data

Dates.col <- as.character(Input.data[,1])

#-------------------------------------------------------------------------------
# Starts the testing to see if the data should be logged

transform.method <- round(BoxCox.lambda(Data.col.ts, method = "loglik"), 5)

log.values <- seq(0, 0.24999, by = 0.00001)
sqrt.values <- seq(0.25, 0.74999, by = 0.00001)

which.transform.log <- transform.method %in% log.values
which.transform.sqrt <- transform.method %in% sqrt.values

if (which.transform.log == "TRUE"){
  as.log <- "log"
  Data.new <- log(Data.col.ts)
} else {
  if (which.transform.sqrt == "TRUE"){
    as.log <- "sqrt"
    Data.new <- sqrt(Data.col.ts)
  } else {
    as.log <- "no"
    Data.new <- Data.col.ts
  }
}

#----- Weighting ---------------------------------------------------------------
fweight <- function(x){
  PatX <- 0.5+x 
  return(PatX)
}

integ1 <- integrate(fweight, lower = 0.00, upper = 1)

valinteg <- 2*integ1$value 

#Split the integral to several intervals, and pick the weights accordingly

integvals <- rep(0, length.out = length(Data.new))
for (i in 1:length(Data.new)){
  integi <- integrate(fweight, lower = (i-1)/length(Data.new), upper= i/length(Data.new))
  integvals[i] <- 2*integi$value
}

suppressWarnings(kpssW <- kpss.test(Data.new, null="Level"))

suppressWarnings(ppW <- tryCatch({
  ppW <- pp.test(Data.new, alternative = "stationary")},
  error = function(ppW){
    ppW <- list(error = "TRUE", p.value = 0.99)
    }))

suppressWarnings(adfW <- adf.test(Data.new, alternative = "stationary",
                                  k = trunc((length(Data.new) - 1)^(1/3))))

suppressWarnings(if (kpssW$p.value < 0.05 |
                       ppW$p.value > 0.05 |
                       adfW$p.value > 0.05){
  ndiffsW = 1
  } else {
    ndiffsW = 0
    })

aaw <- auto.arima(Data.new,
                  max.D         = max.sdiff,
                  d             = ndiffsW,
                  seasonal      = TRUE,
                  allowdrift    = FALSE,
                  stepwise      = FALSE,
                  trace         = FALSE,
                  seasonal.test = "ch")

order.arima <- c(aaw$arma[1], aaw$arma[6] , aaw$arma[2])

order.seasonal.arima <- c(aaw$arma[3], aaw$arma[7], aaw$arma[4])

if (sum(aaw$arma[1:2]) == 0){
  order.arima[1] <- 1
  } else {
    NULL
  }

if (arima.force.seasonality == "y"){
  if(sum(aaw$arma[3:4]) == 0){
    order.seasonal.arima[1] <- 1
    } else {
      NULL
    }
  } else {
    NULL
  }

#----- ARIMA -------------------------------------------------------------------
# Fits an ARIMA model with the orders set
stAW <- Arima(Data.new,
              order    = order.arima,
              seasonal = list(order = order.seasonal.arima),
              method   ="ML")

parSW <- stAW$coef

WMAEOPT <- function(parSW){
  ArimaW <- Arima(Data.new,
                  order         = order.arima,
                  seasonal      = list(order = order.seasonal.arima), 
                  include.drift = FALSE,
                  method        = "ML",
                  fixed         = c(parSW))
  errAR <- c(abs(resid(ArimaW)))
  WMAE <- t(errAR) %*% integvals 
  return(WMAE)
}

OPTWMAE <- optim(parSW,
                 WMAEOPT,
                 method = "SANN",
                 set.seed(2),
                 control = list(fnscale = 1, maxit = 5000))

parS3 <- OPTWMAE$par

Arima.Data.new <- Arima(Data.new, order = order.arima, seasonal=list(order=order.seasonal.arima), 
                        include.drift=FALSE, method = "ML", fixed = c(parS3))

0 个答案:

没有答案