Question

我是R语言的新手，正在尝试学习整洁模型。

我仅对glm用iris dataset遇到此错误，如果我change dataset和配方，则glm运行正常，但是我开始在kknn中收到此错误。

Warning message:
"All models failed in [fit_resamples()]. See the `.notes` column."
Warning message:
"This tuning result has notes. Example notes on model fitting include:
internal: Error: In metric: `roc_auc`

我检查了.notes，结果如下：

.notes
<chr>
internal: Error: In metric: `roc_auc`
A tibble: 1 × 1 .notes
<chr>
internal: Error: In metric: `roc_auc`
A tibble: 1 × 1

Warning message: All models failed in [fit_resamples()]. See the `.notes` column

如上一篇文章所述，我尝试从github升级parsnip和tune软件包，但在安装tune package时出错：Warning in install.packages : package ‘tune’ is not available for this version of R

我不确定这是怎么回事，如果有人可以帮助您，请多多关照！！！

版本信息：

-- Attaching packages --------------------------------------- tidyverse 1.3.0 --

v ggplot2 3.3.2     v purrr   0.3.4
v tibble  3.0.4     v dplyr   1.0.2
v tidyr   1.1.2     v stringr 1.4.0
v readr   1.4.0     v forcats 0.5.0

-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

-- Attaching packages -------------------------------------- tidymodels 0.1.1 --

v broom     0.7.2          v recipes   0.1.14    
v dials     0.0.9          v rsample   0.0.8     
v infer     0.5.3          v tune      0.1.1     
v modeldata 0.0.2          v workflows 0.2.1     
v parsnip   0.1.3.9000     v yardstick 0.0.7     

-- Conflicts ----------------------------------------- tidymodels_conflicts() --
x scales::discard() masks purrr::discard()
x dplyr::filter()   masks stats::filter()
x recipes::fixed()  masks stringr::fixed()
x dplyr::lag()      masks stats::lag()
x yardstick::spec() masks readr::spec()
x recipes::step()   masks stats::step()


Windows 7
platform       x86_64-w64-mingw32          
arch           x86_64                      
os             mingw32                     
system         x86_64, mingw32             
status                                     
major          4                           
minor          0.3                         
year           2020                        
month          10                          
day            10                          
svn rev        79318                       
language       R                           
version.string R version 4.0.3 (2020-10-10)

代码：

library(tidyverse)
library(tidymodels)
library(themis)

iris

# Data split
set.seed(999)

iris_split <- initial_split(iris, strata = Species)

iris_train <- training(iris_split)
iris_test <- testing(iris_split)


# Cross Validation
set.seed(345)

iris_fold <- vfold_cv(iris_train)
print(iris_fold)


# recipe
iris_rec <- recipe(Species ~., data = iris_train) %>%

  #make sure the training set has equal numbers of target variale (not needed for iris dataset)
  step_downsample(Species) %>% 

  #normalise the data
  step_center(-Species) %>% 
  step_scale(-Species) %>% 
  step_BoxCox(-Species) %>% 

  #function to apply the recipe to the data
  prep()


# Workflow
iris_wf <- workflow() %>%
    add_recipe(iris_rec)

# logistic
glm_spec <- logistic_reg() %>%
  set_engine("glm")


# to do parallel processing
doParallel::registerDoParallel()

# adding parameters to workflow
glm_rs <- iris_wf %>%
  add_model(glm_spec) %>%
  fit_resamples(
      resamples = iris_fold,
      metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
      control = control_resamples(save_pred = TRUE)
  )

错误

Warning message:
"All models failed in [fit_resamples()]. See the `.notes` column."
Warning message:
"This tuning result has notes. Example notes on model fitting include:
internal: Error: In metric: `roc_auc`

internal: Error: In metric: `roc_auc`

internal: Error: In metric: `roc_auc`"

# Resampling results
# 10-fold cross-validation 
# A tibble: 10 x 5
   splits           id     .metrics .notes           .predictions
   <list>           <chr>  <list>   <list>           <list>      
 1 <split [102/12]> Fold01 <NULL>   <tibble [1 x 1]> <NULL>      
 2 <split [102/12]> Fold02 <NULL>   <tibble [1 x 1]> <NULL>      
 3 <split [102/12]> Fold03 <NULL>   <tibble [1 x 1]> <NULL>      
 4 <split [102/12]> Fold04 <NULL>   <tibble [1 x 1]> <NULL>      
 5 <split [103/11]> Fold05 <NULL>   <tibble [1 x 1]> <NULL>      
 6 <split [103/11]> Fold06 <NULL>   <tibble [1 x 1]> <NULL>      
 7 <split [103/11]> Fold07 <NULL>   <tibble [1 x 1]> <NULL>      
 8 <split [103/11]> Fold08 <NULL>   <tibble [1 x 1]> <NULL>      
 9 <split [103/11]> Fold09 <NULL>   <tibble [1 x 1]> <NULL>      
10 <split [103/11]> Fold10 <NULL>   <tibble [1 x 1]> <NULL>

（更新）

即使不使用RF计算，也会因Parallel出错

Answer 1

我不认为您遇到的这个问题是由于Windows上当前存在并行处理错误，而是因为您试图将 multiclass 分类问题与二进制分类模型。

如果您更改此示例以使其仅是二进制分类（例如setosa与其他），那么它应该可以工作：

library(tidymodels)
library(themis)
#> Registered S3 methods overwritten by 'themis':
#>   method               from   
#>   bake.step_downsample recipes
#>   bake.step_upsample   recipes
#>   prep.step_downsample recipes
#>   prep.step_upsample   recipes
#>   tidy.step_downsample recipes
#>   tidy.step_upsample   recipes
#> 
#> Attaching package: 'themis'
#> The following objects are masked from 'package:recipes':
#> 
#>     step_downsample, step_upsample, tunable.step_downsample,
#>     tunable.step_upsample

# Data split
set.seed(999)

iris_split <- iris %>%
  mutate(Species = case_when(Species == "setosa" ~ "setosa",
                             TRUE ~ "other")) %>%
  initial_split(strata = Species)

iris_train <- training(iris_split)
iris_test <- testing(iris_split)


# Cross Validation
set.seed(345)

iris_fold <- vfold_cv(iris_train)
iris_fold
#> #  10-fold cross-validation 
#> # A tibble: 10 x 2
#>    splits           id    
#>    <list>           <chr> 
#>  1 <split [101/12]> Fold01
#>  2 <split [101/12]> Fold02
#>  3 <split [101/12]> Fold03
#>  4 <split [102/11]> Fold04
#>  5 <split [102/11]> Fold05
#>  6 <split [102/11]> Fold06
#>  7 <split [102/11]> Fold07
#>  8 <split [102/11]> Fold08
#>  9 <split [102/11]> Fold09
#> 10 <split [102/11]> Fold10


# recipe
iris_rec <- recipe(Species ~ ., data = iris_train) %>%
  #make sure the training set has equal numbers of target variale (not needed for iris dataset)
  step_downsample(Species) %>% 
  #normalise the data
  step_center(-Species) %>% 
  step_scale(-Species) %>% 
  step_BoxCox(-Species)


# Workflow
iris_wf <- workflow() %>%
  add_recipe(iris_rec)

# logistic
glm_spec <- logistic_reg() %>%
  set_engine("glm")


# to do parallel processing
doParallel::registerDoParallel()

# adding parameters to workflow
iris_wf %>%
  add_model(glm_spec) %>%
  fit_resamples(
    resamples = iris_fold,
    metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
    control = control_resamples(save_pred = TRUE)
  )
#> Warning: This tuning result has notes. Example notes on model fitting include:
#> preprocessor 1/1, model 1/1: glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred
#> preprocessor 1/1, model 1/1: glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred
#> preprocessor 1/1, model 1/1: glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred
#> # Resampling results
#> # 10-fold cross-validation 
#> # A tibble: 10 x 5
#>    splits           id     .metrics         .notes           .predictions     
#>    <list>           <chr>  <list>           <list>           <list>           
#>  1 <split [101/12]> Fold01 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [12 × 6]>
#>  2 <split [101/12]> Fold02 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [12 × 6]>
#>  3 <split [101/12]> Fold03 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [12 × 6]>
#>  4 <split [102/11]> Fold04 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#>  5 <split [102/11]> Fold05 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#>  6 <split [102/11]> Fold06 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#>  7 <split [102/11]> Fold07 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#>  8 <split [102/11]> Fold08 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#>  9 <split [102/11]> Fold09 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 10 <split [102/11]> Fold10 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>

^{由reprex package（v0.3.0.9001）于2020-10-22创建}

关于算法未收敛的错误是由于重新采样后示例数据集的大小较小。

在带有tidymodels的r中：警告消息：“ [[fit_resamples（）]中的所有模型均失败。请参阅`.notes`列。”内部：错误：指标：`roc_auc`

1 个答案: