bos <- read_csv("boston_train.csv") %>% clean_names()
bos %>%
mutate_if(is.character, factor) -> bos
然后我分割数据并进行k折叠
# -- set a random seed for repeatablity
set.seed(42)
# -- performs our train / test split
split <- initial_split(bos, prop = 0.7)
# -- extract the training data form our bananna split
train <- training(split)
# -- extract the test data
test <- testing(split)
tree_fold <- vfold_cv(train, 10)
sprintf("Train PCT : %1.2f%%", nrow(train)/ nrow(bos) * 100)
sprintf("Test PCT : %1.2f%%", nrow(test)/ nrow(bos) * 100)
我的目标变量是一个连续变量,我需要我的随机森林来做回归问题
# recipe
rf_recipe <- recipe(av_total ~ ., data=train) %>%
step_rm(pid, zipcode) %>%
step_meanimpute(all_numeric(), -all_outcomes()) %>%
step_log(all_numeric()) %>%
step_modeimpute(all_nominal(),-all_outcomes()) %>%
step_dummy(all_nominal(), -all_outcomes())
#tuning parameters
rf_model <- rand_forest(
mtry = tune(),
trees = 10,
min_n= tune()
) %>%
set_engine("ranger",
importance = "permutation") %>%
set_mode("regression")
rf_wf <- workflow() %>%
add_recipe(rf_recipe) %>%
add_model(rf_model)
rf_grid <- grid_random(mtry(c(5,7)),
min_n(c(15,20)),
size = 10)
# do parallel
all_cores <- detectCores(logical = TRUE)
sprintf("# of Logical Cores: %d", all_cores)
cl <- makeCluster(all_cores)
registerDoParallel(cl)
然后我遇到了错误,无论我如何更改食谱或调整过程,仍然存在
set.seed(52)
rf_tune_rs <- rf_wf %>%
tune_grid(
resamples = tree_fold,
grid = rf_grid,
control = control_resamples(save_pred = TRUE)
)
答案 0 :(得分:0)
我通过在食谱中添加step_unknown项来解决此问题