ARMA部件过度拟合ARMA-GARCH模型拟合通过fGarch包

时间:2016-03-30 14:35:31

标签: r time-series quantitative-finance

当我尝试在“自动”模式下调整ARMA-GARCH模型时,我遇到了问题。我尝试使用相同的信息标准选择最佳模型(我使用BIC和AIC)。 所以,我的算法“在指甲上”:

1)定义ARMA(p,q)-GARCH(e,sigma)模型的max p,max q,max e,max sigma(length)值。

2)创建(0:max p) - (0:max q) - (1:max P) - (0:max sigma)的所有组合(如1-1-2-1等)

3)针对6个主要分布的每组4个拟合ARMA-GARCH模型:norm,snorm,std,sstd,ged,sged。

4)对于p.3中的所有拟合模型,得到BIC,并在这6个模型中选择具有最小BIC的模型(针对不同的分布)。

5)对于所有四个组,使用p.3-4给出的聚合模型。

6)在第5页中选择模型组中具有最低BIC的模型

所以,当我这样做时,我有一个很大的问题 - 这个算法选择具有最大AR和MA部分的模型。并且所有ar和ma系数都很重要!!

这里是我的算法的完整列表。

主要脚本:

library(parallel);
library(fGarch);
library(broman);

maxThreads <- max(1, detectCores() - 1);
internalCluster <- makeCluster(maxThreads);
clusterEvalQ(internalCluster, { library(fGarch); library(Rsolnp); });

setwd("C:/Users/Rebelion/Desktop/Model 2.0")

GetBestGARCHModel      = source("GetBestGARCHModel.R")$value;
GetEmpiricalQuantiles  = source("GetEmpiricalQuantiles.R")$value;
GetGARCHFormula        = source("GetGARCHFormula.R")$value;
GetGARCHQuantileValues = source("GetGARCHQuantileValues.R")$value;
GetModelFitResult      = source("GetModelFitResult.R")$value;
ModelNodeCalculation   = source("ModelNodeCalculation.R")$value;
CheckName   = source("CheckName.R")$value;
GetGARCHBestDistribModel         = source("GetGARCHBestDistribModel.R")$value;

RNGkind("L'Ecuyer-CMRG");
set.seed(round(10000*runif(1)));
(s <- .Random.seed);

N <- 200;

spec = garchSpec(model = list());
z <- garchSim(spec, n = N)
data <- z;

probabilities <- c(0.01, 0.05, 0.95, 0.99);

clusterExport(internalCluster, c("GetGARCHBestDistribModel"));
clusterExport(internalCluster, c("GetGARCHFormula"));
clusterExport(internalCluster, c("ModelNodeCalculation"));

result <- GetModelFitResult(data , probabilities,  0.95, 5, 3, internalCluster);

if (!is.null(result)) 
{
    lowBound <- result$boundaries[[2]];
    highBound <- result$boundaries[[3]];

    plot(sapply(1:length(data), function(i) { data[[i]]; }), type="l");
    lines(sapply(1:length(result$bestModel@fitted), function(i) { result$bestModel@fitted[[i]]; }));
    lines(sapply(1:length(lowBound), function(i) { lowBound[[i]]; }), col = "red");
    lines(sapply(1:length(highBound), function(i) { highBound[[i]]; }), col = "blue");

    result$bestModel
} else 
{
    cat("Error in GARCH estimation \n");
}

stopCluster(internalCluster);

nextRNGStream(s);
nextRNGSubStream(s);

GetModelFitResult

GetModelFitResult <- function(timeSeries, boundaryProbabilities, parametersConfidence = 0.95, maxARMAPart, maxGARCHPart, cl = NULL)
{
    bestModel <- GetBestGARCHModel(timeSeries, maxARMAPart, maxGARCHPart, parametersConfidence, cl);

    if (!identical(bestModel$criterion, Inf))
    {
      print(bestModel$model)

      quantilesList <- GetGARCHQuantileValues(bestModel$model, boundaryProbabilities);

      print(quantilesList$quantiles);

      boundaries <- lapply(quantilesList$quantiles, function(val) { bestModel$model@fitted + val*bestModel$model@sigma.t; });
      names(boundaries) <- boundaryProbabilities;

      result <- list(quantiles = quantilesList$quantiles, boundaries = boundaries, bestModel = bestModel$model, criterion = bestModel$criterion);
      return (result);
    }
    else
    {
      result <- NULL;
    }
}

GetBestGARCHModel

GetBestGARCHModel <- function(timeSeries, maxARMAModelSize, maxGARCHModelSize, confidence, cl)
{
  ARParam <- MAParam <- 0:maxARMAModelSize;
  sigmaParam <- 0:maxGARCHModelSize; 
  eParam <- 1:maxGARCHModelSize;

  gridVal <- expand.grid(ARParam, MAParam, eParam, sigmaParam);

  parametersList <- lapply(length(gridVal[,1]):1, function(i)
  { 
    return (sapply(1:4, function(j, k = i) { return (gridVal[k,][[j]]); }));
  });

  if (!is.null(cl))
  {
      bestLocalModels <- parLapplyLB(cl, parametersList, GetGARCHBestDistribModel, data = timeSeries, criterionType = "BIC");
  }
  else
  {
        bestLocalModels <- lapply(parametersList, GetGARCHBestDistribModel, data = timeSeries, criterionType = "BIC");    
  }

  criterionList <- sapply(bestLocalModels, function(mod) {  return(mod$criterion); } );
  bestModelIndex <- which.min(criterionList);

  if (!identical(criterionList[bestModelIndex], Inf)) { bestModel <- bestLocalModels[[bestModelIndex]]$model; }
  else { bestModel <- NULL; }

  result <- list(model = bestModel, criterion = bestLocalModels[[bestModelIndex]]$criterion);

  return(result);
}

GetGARCHBestDistribModel

GetGARCHBestDistribModel <- function(data, modelParams, criterionType, confidence = 0.95)
{
    garchFormula <- GetGARCHFormula(c(modelParams[1], modelParams[2]), c(modelParams[3], modelParams[4]));
    distribs <- c("norm", "snorm", "std", "sstd", "ged", "sged");

    fittedModels <- lapply(distribs, ModelNodeCalculation, data = data, garchFormula = garchFormula);

    names(fittedModels) <- distribs;

    fittedModelIC <- sapply(fittedModels, function(model) 
    { 
        if (!is.null(model)) { result <- model@fit$ics[[criterionType]]; } 
        else { result <- Inf; } 

        return(result); 
    });

    bestModelIndex <- which.min(fittedModelIC);
    bestModelIC <- min(fittedModelIC);

    if (!identical(bestModelIC, Inf)) 
    { 
        bestModel <- fittedModels[[bestModelIndex]];

        pValues <- bestModel@fit$matcoef[, "Pr(>|t|)"];

          includeMean <- !is.na(pValues[["mu"]]) & (pValues[["mu"]] < 1 - confidence);
        includeLeverage <- !is.na(pValues[["gamma1"]]) & (pValues[["gamma1"]] < 1 - confidence);
        #includeDelta <- !is.na(pValues[["delta"]]) & (pValues[["delta"]] < 1 - confidence);

        model <- try(garchFit(garchFormula, data, cond.dist=bestModel@fit$params$cond.dist, trace=FALSE, 
                              include.mean = includeMean, leverage = includeLeverage), silent = TRUE);

        if (inherits(model, "try-error")) { model <- bestModel; }
        else { bestModelIC <- model@fit$ics[[criterionType]]; }


        result <- list(model = model, criterion = bestModelIC);
    }
    else 
    { 
        result <- list(model = NULL, criterion = Inf); 
    }

    return(result);
}

ModelNodeCalculation

ModelNodeCalculation <- function(distrib, data, garchFormula)
{
    model <- try(garchFit(garchFormula, data, cond.dist=distrib, trace=FALSE, leverage = TRUE), silent = TRUE);

    if (inherits(model, "try-error")) { model <- NULL; }

    return(model);
}

GetGARCHQuantileValues

GetGARCHQuantileValues <- function(model, probabilities, sd = 1)
{
  xi <- nu <- distrib <- NA;
  mu <- 0;

  coef <- model@fit$matcoef[,1];

  isSkewed <- CheckName(coef, "skew");
  isShaped <- CheckName(coef, "shape");
  isMeaned <- CheckName(coef, "mu");

  distrib <- model@fit$params$cond.dist;

  xi <- switch(1 + as.numeric(any(isSkewed)), 0, coef[["skew"]]);
  nu <- switch(1 + as.numeric(any(isShaped)), 0, coef[["shape"]]);
  mu <- switch(1 + as.numeric(any(isMeaned)), 0, coef[["mu"]]);

  #quantiles <- switch(distrib, norm = qnorm(probabilities, mean = mu, sd = sd), snorm = qsnorm(probabilities, xi = xi, mean = mu, sd = sd), 
  #                            ged = qged(probabilities, nu = nu, mean = mu, sd = sd), sged = qsged(probabilities, nu = nu, xi = xi, mean = mu, sd = sd),
  #                            std = qstd(probabilities, nu = nu, mean = mu, sd = sd), sstd = qsged(probabilities, nu = nu, xi = xi, mean = mu, sd = sd));

  empiricalQuantiles <- GetEmpiricalQuantiles(probabilities, distrib, mean = mu, xi = xi, nu = nu, sd = sd);

  GetGARCHQuantileValues <- list(quantiles = empiricalQuantiles, distrib = distrib, distribParams = c(xi, nu, mu));  
  return(GetGARCHQuantileValues);
}

GetEmpiricalQuantiles

GetEmpiricalQuantiles <- function(probabilities, distrib, mean = 0, sd = 1, xi, nu, count = 1000000)
{
  sampleGeneration <- switch(distrib, norm = rnorm(count, mean = mean, sd = sd),
                                 snorm = rsnorm(count, mean = mean, sd = sd, xi = xi),
                                 ged = rged(count, mean = mean, sd = sd, nu = nu),
                                 sged = rsged(count, mean = mean, sd = sd, xi = xi, nu = nu),
                                 std = rged(count, mean = mean, sd = sd, nu = nu),
                                 sstd = rsged(count, mean = mean, sd = sd, xi = xi, nu = nu));

  sampleQuantiles <- quantileSE(sampleGeneration, p = probabilities, na.rm = TRUE, names = FALSE);
  return (sampleQuantiles["quantile",]);
}

GetGARCHFormula

GetGARCHFormula <- function(armaOrder, garchOrder)
{
  garchFormulaString <- "~ ";

  garchFormulaString <- paste(garchFormulaString, "arma(", armaOrder[1], ", ", armaOrder[2], ") + ", sep = "");
  garchFormulaString <- paste(garchFormulaString, "garch(", garchOrder[1], ", ", garchOrder[2], ")", sep = "");

  garchFormula <- as.formula(garchFormulaString);
  return(garchFormula);
}

检查名

CheckName <- function(inputList, name)
{
  listNames <- names(inputList);  
  result <- any(sapply(1:length(listNames), function(i) { name == listNames[i]; }));

  return (result);
}

因此,例如,一个模拟结果:

> result$bestModel

Title:
 GARCH Modelling 

Call:
 garchFit(formula = garchFormula, data = data, cond.dist = bestModel@fit$params$cond.dist, 
    include.mean = includeMean, leverage = includeLeverage, trace = FALSE) 

Mean and Variance Equation:
 data ~ arma(5, 5) + garch(2, 0)
<environment: 0x00000000082bf868>
 [data = data]

Conditional Distribution:
 ged 

Coefficient(s):
         mu          ar1          ar2          ar3          ar4          ar5          ma1          ma2          ma3          ma4          ma5        omega       alpha1       alpha2        shape  
 5.2245e-04   7.6557e-01  -3.0486e-01   3.6348e-01   2.4784e-01  -7.1259e-01  -9.3906e-01   4.5539e-01  -7.4762e-01  -1.5968e-01   9.8356e-01   4.7787e-06   2.8803e-01   1.0000e-08   2.6449e+00  

Std. Errors:
 based on Hessian 

Error Analysis:
         Estimate  Std. Error    t value Pr(>|t|)    
mu      5.225e-04   1.218e-07   4288.015  < 2e-16 ***
ar1     7.656e-01   2.913e-05  26285.263  < 2e-16 ***
ar2    -3.049e-01   2.918e-05 -10447.691  < 2e-16 ***
ar3     3.635e-01   2.915e-05  12471.080  < 2e-16 ***
ar4     2.478e-01   2.902e-05   8541.232  < 2e-16 ***
ar5    -7.126e-01   2.878e-05 -24762.254  < 2e-16 ***
ma1    -9.391e-01   2.839e-05 -33074.580  < 2e-16 ***
ma2     4.554e-01   2.781e-05  16377.493  < 2e-16 ***
ma3    -7.476e-01   2.693e-05 -27764.536  < 2e-16 ***
ma4    -1.597e-01   2.561e-05  -6235.408  < 2e-16 ***
ma5     9.836e-01   2.359e-05  41695.273  < 2e-16 ***
omega   4.779e-06          NA         NA       NA    
alpha1  2.880e-01   9.373e-02      3.073  0.00212 ** 
alpha2  1.000e-08   5.983e-02      0.000  1.00000    
shape   2.645e+00   6.135e-02     43.115  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


Log Likelihood:
 913.7433    normalized:  4.568717 

我无法理解它是如何可能的?这个问题可以保存我尝试的所有型号。高斯噪声,ARMA-GARCH模型,纯GARCH模型等

有什么问题?它是在主算法还是代码实现中?

谢谢。

0 个答案:

没有答案