我已经设置了以下功能:
cv_model <- function(dat, targets, predictors_name){
library(randomForest)
library(caret)
library(MLmetrics)
library(Metrics)
# set up error measures
sumfct <- function(data, lev = NULL, model = NULL){
mape <- MAPE(y_pred = data$pred, y_true = data$obs)
RMSE <- sqrt(mean((data$pred - data$obs)^2, na.omit = TRUE))
MAE <- mean(abs(data$obs - data$pred))
BIAS <- mean(data$obs - data$pred)
Rsquared <- R2(pred = data$pred, obs = data$obs, formula = "corr", na.rm = FALSE)
c(MAPE = mape, RMSE = RMSE, MAE = MAE, BIAS = BIAS, Rsquared = Rsquared)
}
for (k in 1:length(dat)) {
a <- dat[[k]][dat[[k]]$vari == "a", -c(which(names(dat[[k]]) == "vari"))]
b <- dat[[k]][dat[[k]]$vari == "b", -c(which(names(dat[[k]]) == "vari"))]
ab <- list(a, b)
for (i in 1:length(targets)) {
for (j in 1:length(ab)) {
# specifiy trainControl
control <- trainControl(method="repeatedcv", number=10, repeats=10, search="grid", savePred =T,
summaryFunction = sumfct)
tunegrid <- expand.grid(mtry=c(1:length(predictors_name)))
set.seed(42)
model <- train(formula(paste0(targets[i],
" ~ ",
paste(predictors_name, sep = '', collapse = ' + '))),
data = ab[[j]],
method="rf",
ntree = 25,
metric= "RMSE",
tuneGrid=tunegrid,
trControl=control)
}
}
}
}
根据本教程(https://topepo.github.io/caret/parallel-processing.html),我可以仅通过调用library(doParallel); cl <- makePSOCKcluster(2); registerDoParallel(cl)
来并行化代码。
然后,当我将该功能与doParallel
predictors_name <- c("Time", "Chick")
targets <- "weight"
dat <- as.data.frame(ChickWeight)
dat$vari <- rep(NA, nrow(dat))
dat$vari[c(1:10,320:350)] <- "a"
dat$vari[-c(1:10,320:350)] <- "b"
d <- list(dat[1:300,], dat[301:500,])
## use 2 of the cores
library(doParallel)
cl <- makePSOCKcluster(2)
registerDoParallel(cl)
cv_model(dat = d, targets = targets, predictors_name = predictors_name)
# end parallel computing
stopCluster(cl)
发生错误消息couldn't find function "MAPE"
。
如何在不使用foreach语法的情况下解决此问题?
答案 0 :(得分:0)
如果我在调用package::function
之类的函数时指定了包,则说明该包有效。也许有一个更优雅的解决方案,但这是我使代码运行时没有错误的方式:
cv_model <- function(dat, targets, predictors_name){
library(randomForest)
library(caret)
library(MLmetrics)
library(Metrics)
# set up error measures
sumfct <- function(data, lev = NULL, model = NULL){
mape <- MLmetrics::MAPE(y_pred = data$pred, y_true = data$obs)
RMSE <- sqrt(mean((data$pred - data$obs)^2, na.omit = TRUE))
MAE <- mean(abs(data$obs - data$pred))
BIAS <- mean(data$obs - data$pred)
Rsquared <- R2(pred = data$pred, obs = data$obs, formula = "corr", na.rm = FALSE)
c(MAPE = mape, RMSE = RMSE, MAE = MAE, BIAS = BIAS, Rsquared = Rsquared)
}
for (k in 1:length(dat)) {
a <- dat[[k]][dat[[k]]$vari == "a", -c(which(names(dat[[k]]) == "vari"))]
b <- dat[[k]][dat[[k]]$vari == "b", -c(which(names(dat[[k]]) == "vari"))]
ab <- list(a, b)
for (i in 1:length(targets)) {
for (j in 1:length(ab)) {
# specifiy trainControl
control <- caret::trainControl(method="repeatedcv", number=10, repeats=10, search="grid", savePred =T,
summaryFunction = sumfct)
tunegrid <- expand.grid(mtry=c(1:length(predictors_name)))
set.seed(42)
model <- caret::train(formula(paste0(targets[i],
" ~ ",
paste(predictors_name, sep = '',
collapse = ' + '))),
data = ab[[j]],
method="rf",
ntree = 25,
metric= "RMSE",
tuneGrid=tunegrid,
trControl=control)
}
}
}
}
predictors_name <- c("Time", "Chick", "Diet")
targets <- "weight"
dat <- as.data.frame(ChickWeight)
dat$vari <- rep(NA, nrow(dat))
dat$vari[c(1:10,320:350)] <- "a"
dat$vari[-c(1:10,320:350)] <- "b"
d <- list(dat[1:300,], dat[301:578,])
## use 2 of the cores
library(doParallel)
cl <- makePSOCKcluster(2)
registerDoParallel(cl)
cv_model(dat = d, targets = targets, predictors_name = predictors_name)
# end parallel computing
stopCluster(cl)