R doParallel:找不到功能

时间:2019-02-14 17:26:29

标签: r for-loop r-caret doparallel

我已经设置了以下功能:

cv_model <- function(dat, targets, predictors_name){

  library(randomForest)
  library(caret)
  library(MLmetrics)
  library(Metrics)

  # set up error measures
  sumfct <- function(data, lev = NULL, model = NULL){
    mape <- MAPE(y_pred = data$pred, y_true = data$obs)
    RMSE <- sqrt(mean((data$pred - data$obs)^2, na.omit = TRUE))
    MAE <- mean(abs(data$obs - data$pred))
    BIAS <- mean(data$obs - data$pred)
    Rsquared <- R2(pred = data$pred, obs = data$obs, formula = "corr",    na.rm = FALSE)
    c(MAPE = mape, RMSE = RMSE, MAE = MAE, BIAS = BIAS, Rsquared = Rsquared)
  }

  for (k in 1:length(dat)) {

    a <- dat[[k]][dat[[k]]$vari == "a", -c(which(names(dat[[k]]) == "vari"))]
    b <- dat[[k]][dat[[k]]$vari == "b", -c(which(names(dat[[k]]) == "vari"))]
    ab <- list(a, b)

    for (i in 1:length(targets)) {
      for (j in 1:length(ab)) {


        # specifiy trainControl
        control <- trainControl(method="repeatedcv", number=10,   repeats=10, search="grid", savePred =T,
                                summaryFunction = sumfct)

        tunegrid <- expand.grid(mtry=c(1:length(predictors_name)))




        set.seed(42)
        model <- train(formula(paste0(targets[i], 
                                      " ~ ", 
                                      paste(predictors_name, sep = '',    collapse = ' + '))),
                       data = ab[[j]],
                       method="rf",
                       ntree = 25, 
                       metric= "RMSE", 
                       tuneGrid=tunegrid, 
                       trControl=control)


      }
    }
  }

}

根据本教程(https://topepo.github.io/caret/parallel-processing.html),我可以仅通过调用library(doParallel); cl <- makePSOCKcluster(2); registerDoParallel(cl)来并行化代码。 然后,当我将该功能与doParallel

一起使用时
predictors_name <- c("Time", "Chick")
targets <- "weight"

dat <- as.data.frame(ChickWeight)
dat$vari <- rep(NA, nrow(dat))
dat$vari[c(1:10,320:350)] <- "a"
dat$vari[-c(1:10,320:350)] <- "b"

d <- list(dat[1:300,], dat[301:500,])

## use 2 of the cores
library(doParallel)
cl <- makePSOCKcluster(2)
registerDoParallel(cl)

cv_model(dat = d, targets = targets, predictors_name = predictors_name)

# end parallel computing
stopCluster(cl)

发生错误消息couldn't find function "MAPE"

如何在不使用foreach语法的情况下解决此问题?

1 个答案:

答案 0 :(得分:0)

如果我在调用package::function之类的函数时指定了包,则说明该包有效。也许有一个更优雅的解决方案,但这是我使代码运行时没有错误的方式:

cv_model <- function(dat, targets, predictors_name){

  library(randomForest)
  library(caret)
  library(MLmetrics)
  library(Metrics)

  # set up error measures
  sumfct <- function(data, lev = NULL, model = NULL){
            mape <- MLmetrics::MAPE(y_pred = data$pred, y_true = data$obs)
            RMSE <- sqrt(mean((data$pred - data$obs)^2, na.omit = TRUE))
            MAE <- mean(abs(data$obs - data$pred))
            BIAS <- mean(data$obs - data$pred)
            Rsquared <- R2(pred = data$pred, obs = data$obs, formula = "corr", na.rm = FALSE)
            c(MAPE = mape, RMSE = RMSE, MAE = MAE, BIAS = BIAS, Rsquared = Rsquared)
            }

  for (k in 1:length(dat)) {

    a <- dat[[k]][dat[[k]]$vari == "a", -c(which(names(dat[[k]]) == "vari"))]
    b <- dat[[k]][dat[[k]]$vari == "b", -c(which(names(dat[[k]]) == "vari"))]
    ab <- list(a, b)

    for (i in 1:length(targets)) {
      for (j in 1:length(ab)) {


        # specifiy trainControl
        control <- caret::trainControl(method="repeatedcv", number=10, repeats=10, search="grid", savePred =T,
                                       summaryFunction = sumfct)

        tunegrid <- expand.grid(mtry=c(1:length(predictors_name)))

        set.seed(42)
        model <- caret::train(formula(paste0(targets[i], 
                                             " ~ ", 
                                             paste(predictors_name, sep = '', 
                                             collapse = ' + '))),
                              data = ab[[j]],
                              method="rf",
                              ntree = 25, 
                              metric= "RMSE", 
                              tuneGrid=tunegrid, 
                              trControl=control)


      }
    }
  }

}

predictors_name <- c("Time", "Chick", "Diet")
targets <- "weight"

dat <- as.data.frame(ChickWeight)
dat$vari <- rep(NA, nrow(dat))
dat$vari[c(1:10,320:350)] <- "a"
dat$vari[-c(1:10,320:350)] <- "b"

d <- list(dat[1:300,], dat[301:578,])

## use 2 of the cores
library(doParallel)
cl <- makePSOCKcluster(2)
registerDoParallel(cl)

cv_model(dat = d, targets = targets, predictors_name = predictors_name)

# end parallel computing
stopCluster(cl)