Question

此分析的最后阶段出现错误。在h2o模型上运行explain（）函数时，我收到以下错误：错误：所有排列与原始观察没有相似之处。尝试将bin_continuous设置为TRUE和/或增加kernel_size 我在错误中尝试了两个建议。如果我将bin_continous更改为TRUE，则lime（）函数不起作用，其他内核大小也不起作用。

有关如何解决此问题并因此能够使用plot_features（）函数获取结果的任何想法？

library(readxl)
library(httr)
library(dplyr)
library(h2o)        
library(lime) 


GET("https://community.watsonanalytics.com/wp-content/uploads/2015/03/WA_FnUseC_-HR-Employee-Attrition.xlsx", 
   write_disk(tf <- tempfile(fileext = ".xls")))
hr_data_raw <- read_xlsx(tf)


hr_data <- hr_data_raw %>%
mutate_if(is.character, as.factor) %>%
select(Attrition, everything())  


h2o.init()
h2o.no_progress() 

hr_data_h2o <- as.h2o(hr_data)
split_h2o <- h2o.splitFrame(hr_data_h2o, c(0.7, 0.15), seed = 1234 )
train_h2o <- h2o.assign(split_h2o[[1]], "train" ) # 70%
valid_h2o <- h2o.assign(split_h2o[[2]], "valid" ) # 15%
test_h2o  <- h2o.assign(split_h2o[[3]], "test" )  # 15%  


y <- "Attrition"
x <- setdiff(names(train_h2o), y)  
automl_models_h2o <- h2o.automl(
x = x, 
y = y,
training_frame = train_h2o,
validation_frame = valid_h2o,
leaderboard_frame = test_h2o,
max_runtime_secs  = 30)

automl_leader <- automl_models_h2o@leader 


explainer <- lime::lime(
   as.data.frame(train_h2o[,-1]), 
   model = automl_leader, 
   bin_continuous = F)

explanation <- lime::explain(
   as.data.frame(test_h2o[1:10, -1]), 
   explainer = explainer, 
   n_labels     = 1, 
   n_features   = 4)

  # Error: All permutations have no similarity to the original observation.
  # Try setting bin_continuous to TRUE and/or increase kernel_size

  # Cannot Continue
  plot_features(explanation)

运行explain（）时在R错误中使用lime和h2o

0 个答案: