Foreach并行RandomForest预测误差

时间:2017-07-13 14:18:05

标签: r machine-learning foreach

我正在尝试使用randomForest函数运行foreach多核模式。树的拟合似乎正在工作,但是当尝试在结果模型上使用predict时,它给出了以下错误消息:

Error in UseMethod("predict") : 
  no applicable method for 'predict' applied to an object of class "call"

似乎foreach函数会返回一个简单的list而不是正确的randomForest模型。

以下是我尝试运行的完整代码:

# sample from 1 to k, nrow times (the number of observations in the data)
labeled_data <- bundesliga[bundesliga$Season<2017,]
labeled_data$id <- sample(1:k, nrow(labeled_data), replace = TRUE)
list <- 1:k

# prediction and testset data frames that we add to with each iteration over
# the folds

#Creating a progress bar to know the status of CV
progress.bar <- create_progress_bar("text")
progress.bar$init(k)
prediction <- data.frame()
testsetCopy <- data.frame()
accuracy <- list()

rf.formula <- as.formula(paste("as.factor(FTR)","~",paste("AvgAgeHome",
                                                           "AvgAge_Away",
                                                           "AvgMarketValueHome_z_score",
                                                           "AvgMarketValue_Away_z_score",
                                                           "ForeignPlayersHome",                         
                                                           "ForeignPlayers_Away",
                                                           "KaderHome", 
                                                           "Kader_Away",
                                                           "no_won_matches_last_20_home", 
                                                           "no_won_matches_last_20_away",
                                                           "no_won_matches_last_15_home", 
                                                           "no_won_matches_last_15_away",
                                                           "no_won_matches_last_10_home", 
                                                           "no_won_matches_last_10_away",
                                                           "no_won_matches_last_5_home", 
                                                           "no_won_matches_last_5_away",
                                                           "no_won_matches_last_3_home", 
                                                           "no_won_matches_last_3_away", 
                                                           "no_won_matches_last_2_home", 
                                                           "no_won_matches_last_2_away",
                                                           "won_last_1_matches_away", 
                                                           "won_last_1_matches_home",
                                                           "OverallMarketValueHome_z_score",
                                                           "OverallMarketValue_Away_z_score",
                                                           "roll_FTHG_Home", 
                                                           "roll_FTAG_Away",
                                                           "Stadium.Capacity.y", 
                                                           "WDL_3_roll_matches_away",
                                                           "WDL_3_roll_matches_home", 
                                                           "WDL_2_roll_matches_home", 
                                                           "WDL_2_roll_matches_away", 
                                                           "WDL_1_roll_matches_home", 
                                                           "WDL_1_roll_matches_away",sep="+")))


for (i in 1:k){
# remove rows with id i from dataframe to create training set
# select rows with id i to create test set
trainingset <- subset(labeled_data, id %in% list[-i])
testset <- subset(labeled_data, id %in% c(i))

#run a random forest model
rf <- foreach(ntree=rep(1, 8), 
.combine=combine,.packages='randomForest') %dopar% {
    environment(rf.formula) <- environment()
    randomForest(rf.formula,data=trainingset, ntree=ntree)
}

print(class(rf))

# remove response column 1
pred <- predict(rf, testset[,-1])
temp <- as.data.frame(pred)

match_test_pred <- cbind(as.data.frame(testset),temp)

accuracy_fold <- sum(match_test_pred$Correct)/nrow(match_test_pred)
accuracy <- rbind(accuracy,accuracy_fold)

# append this iteration's predictions to the end of the prediction 
data frame
prediction <- rbind(prediction, temp)

# append this iteration's test set to the test set copy data frame
# keep only the Sepal Length Column
testsetCopy <- rbind(testsetCopy, as.data.frame(testset$FTR))
print(confusionMatrix(pred,testset$FTR))

progress.bar$step()
}

提前感谢您的帮助!

0 个答案:

没有答案