Broom :: nnet :: multinom模型的数据帧整齐错误

时间:2016-12-16 16:17:12

标签: r dplyr nnet broom

我使用nnet生成多项模型,并为数据集中的每个城市安装了模型。当我尝试对这些模型使用tidy时,我收到以下错误:

Error in probs[i, -1, drop = FALSE] : subscript out of bounds

但是,如果我单独为每个城市生成模型,然后使用tidy我没有收到任何模型的错误。我也可以使用glace而不会出错。

可能导致此错误的原因是什么?

library(broom)
library(dplyr)
library(nnet)

dfstack <- structure(list(Var1 = c(73L, 71L, 66L, 75L, 96L, 98L, 98L, 65L, 
75L, 74L, 71L, 98L, 100L, 87L, 78L, 50L, 73L, 82L, 70L, 70L, 
31L, 34L, 32L, 100L, 100L, 100L, 54L, 51L, 36L, 48L, 66L, 60L, 
59L, 72L, 76L, 90L, 85L, 76L, 55L, 53L, 42L, 54L, 54L, 10L, 34L, 
18L, 6L, 16L, 63L, 41L, 68L, 55L, 52L, 57L, 64L, 61L, 68L, 44L, 
33L, 19L, 38L, 54L, 44L, 87L, 100L, 100L, 63L, 75L, 76L, 100L, 
100L, 64L, 95L, 90L, 99L, 98L, 87L, 62L, 62L, 88L, 79L, 85L), 
    Status = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A", 
    "B", "C"), class = "factor"), City = structure(c(3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 
    3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("Denver", "Miami", "NYC"), class = "factor"), 
    ID = structure(c(52L, 63L, 74L, 77L, 78L, 79L, 80L, 81L, 
    82L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 64L, 
    31L, 42L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 32L, 1L, 12L, 
    23L, 25L, 26L, 27L, 28L, 29L, 30L, 2L, 3L, 4L, 5L, 65L, 66L, 
    67L, 68L, 69L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 6L, 
    7L, 8L, 9L, 10L, 11L, 13L, 70L, 71L, 72L, 73L, 75L, 76L, 
    41L, 43L, 44L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 
    24L), .Label = c("Denver1", "Denver10", "Denver11", "Denver12", 
    "Denver13", "Denver14", "Denver15", "Denver16", "Denver17", 
    "Denver18", "Denver19", "Denver2", "Denver20", "Denver21", 
    "Denver22", "Denver23", "Denver24", "Denver25", "Denver26", 
    "Denver27", "Denver28", "Denver29", "Denver3", "Denver30", 
    "Denver4", "Denver5", "Denver6", "Denver7", "Denver8", "Denver9", 
    "Miami1", "Miami10", "Miami11", "Miami12", "Miami13", "Miami14", 
    "Miami15", "Miami16", "Miami17", "Miami18", "Miami19", "Miami2", 
    "Miami20", "Miami21", "Miami3", "Miami4", "Miami5", "Miami6", 
    "Miami7", "Miami8", "Miami9", "NYC1", "NYC10", "NYC11", "NYC12", 
    "NYC13", "NYC14", "NYC15", "NYC16", "NYC17", "NYC18", "NYC19", 
    "NYC2", "NYC20", "NYC21", "NYC22", "NYC23", "NYC24", "NYC25", 
    "NYC26", "NYC27", "NYC28", "NYC29", "NYC3", "NYC30", "NYC31", 
    "NYC4", "NYC5", "NYC6", "NYC7", "NYC8", "NYC9"), class = "factor")), class = "data.frame", row.names = c(NA, -82L), .Names = c("Var1", "Status", "City", "ID"))

Model.List <- dfstack %>% group_by(City) %>% do(modfits = multinom(Status~Var1, data=.))
tidy(Model.List, modfits) # produces error
glance(Model.List, modfits) # no error

# no error when each city on its own
df1 <- dfstack %>% filter(City == "NYC") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df1, modfit1)

df2 <- dfstack %>% filter(City == "Miami") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df2, modfit1)

df3 <- dfstack %>% filter(City == "Denver") %>% do(modfit1 = multinom(Status~Var1, data=.))
tidy(df3, modfit1)

1 个答案:

答案 0 :(得分:1)

不要让我解释原因,但我明白了。

tidy.multinom拨打summary.multinom,呼叫vcov.multinom,呼叫multinomHess。该错误是在multinomHess中生成的,仅在原始调用multinom时未生成Hessian矩阵时运行。也就是说,如果您不打算使用summary对象,则不一定需要花时间计算Hessian矩阵。

出于某种原因,当multinom调用中形成do个对象时,summary.multinom无法计算Hessian矩阵。使用multinom调用Hess = TRUE可以避免这种情况。见下文:

Model.List <- 
  dfstack %>% 
  group_by(City) %>% 
  do(modfits = multinom(Status~Var1, 
                        data=., 
                        Hess = TRUE))
tidy(Model.List, modfits) 
glance(Model.List, modfits) 

在原始代码中,glance没有投放错误,因为glance.multinom不依赖于summary.multinom