插入包中的用户定义度量

时间:2014-03-16 08:53:56

标签: r

我希望使用带有度量值的插入符号包,该度量值不是默认选项之一。对于下面的示例,我使用Metrics包。我已经阅读了StackOverflow上的所有相关问题以及插入符号网站上的指南,但仍然收到错误。

在下面的示例中,我希望使用平均绝对误差。

创建一个函数:

maefunction<-function(data, lev=NULL, model=NULL){
  require(Metrics)
  MAE<-mae(data[, "obs"], data[, "pred"])
  out<-c(MAE)
  out
}

现在我将该函数插入trainControl

library(caret)
GBM<-train(train$result~., data=train, method="gbm", trControl=trainControl(summaryFunction=maefunction), metric=MAE)

我收到以下消息

Error in list_to_dataframe(res, attr(.data, "split_labels"), .id, id_as_factor) : 
Results must be all atomic, or all data frames
In addition: Warning messages:
1: In if (metric %in% c("Accuracy", "Kappa")) stop(paste("Metric",  :
  the condition has length > 1 and only the first element will be used
2: In if (metric == "ROC" & !ctrl$classProbs) stop("train()'s use of ROC codes requires                class probabilities. See the classProbs option of trainControl()") :
  the condition has length > 1 and only the first element will be used
3: In if (!(metric %in% perfNames)) { :
  the condition has length > 1 and only the first element will be used
4: In train.default(x, y, weights = w, ...) :
  The metric "4" was not in the result set.  will be used instead.The metric "0.5" was    not in the result set.  will be used instead.

2 个答案:

答案 0 :(得分:11)

我认为您必须使用命名向量(请参阅下面的示例)。我没有在文档中明确说明,所以我将更新该部分。

最高

library(mlbench)
data(BostonHousing)

maeSummary <- function (data,
                        lev = NULL,
                        model = NULL) {
   out <- mae(data$obs, data$pred)  
   names(out) <- "MAE"
   out
}

mControl <- trainControl(summaryFunction = maeSummary)
marsGrid <- expand.grid(degree = 1, nprune = (1:10) * 2)

set.seed(1)
earthFit <- train(medv ~ .,
                  data = BostonHousing, 
                  "earth",
                  tuneGrid = marsGrid,
                  metric = "MAE",
                  maximize = FALSE,
                  trControl = mControl)

答案 1 :(得分:-1)

mae <- function(pred, obs) 
{
  isNA <- is.na(pred)
  pred <- pred[!isNA]
  obs <- obs[!isNA]
  if (!is.factor(obs) & is.numeric(obs)) {
    if (length(obs) + length(pred) == 0) {
      out <- rep(NA, 2)
    }
    else {
      if (length(unique(pred)) < 2 || length(unique(obs)) < 
          2) {
        resamplCor <- NA
      }
      else {
        resamplCor <- try(cor(pred, obs, use = "pairwise.complete.obs"), 
                          silent = TRUE)
        if (class(resamplCor) == "try-error") 
          resamplCor <- NA
      }
      mse <- mean((pred - obs)^2)
      mae <- mean(abs(pred - obs))
      n <- length(obs)
      out <- c(mae, sqrt(mse), resamplCor^2)
    }
    names(out) <- c("MAE", "RMSE", "Rsquared")
  }
  else {
    if (length(obs) + length(pred) == 0) {
      out <- rep(NA, 2)
    }
    else {
      pred <- factor(pred, levels = levels(obs))
      requireNamespaceQuietStop("e1071")
      out <- unlist(e1071::classAgreement(table(obs, pred)))[c("diag", 
                                                               "kappa")]
    }
    names(out) <- c("Accuracy", "Kappa")
  }
  if (any(is.nan(out))) 
    out[is.nan(out)] <- NA
  out
}



MAEFunction <- function (data, lev = NULL, model = NULL) 
{
  if (is.character(data$obs)) 
    data$obs <- factor(data$obs, levels = lev)
  mae(data[, "pred"], data[, "obs"])
}