逻辑回归:eval中的错误(family $ initialize):y值必须为0< = y< = 1

时间:2018-04-30 17:19:43

标签: r logistic-regression

我是R的新手并尝试对一组数据运行逻辑模型,但我收到此错误:Error in eval(family$initialize) : y values must be 0 <= y <= 1

cg <- structure(list(Age = c(27, 22.9, 26.3, 29.1, 28.1, 26.2, 29.1, 26.8, 28.4, 32.3, 25.9, 25, 26.4, 28.6, 27.2, 28.8, 28.7, 27.5, 24.8, 28, 27.1, 29, 26.3, 26.8, 25.1, 29.4, 27.1, 28.6, 27.8, 28, 30.1, 23.9, 30.3, 26.5, 26.4, 27.7, 29.4, 29, 30.7, 29.2, 24.8, 27.4, 28.1, 26.4, 23.1, 26.8, 26.8, 27.5, 30, 27.2, 27.2),
        HS = c(41.3, 66.7, 58.1, 39.9, 62.6, 63.9, 56, 54.6, 55.2, 52.6, 40.6, 61.9, 59.5, 52.6, 52.9, 59, 59.9, 38.5, 42.2, 54.7, 52.3, 58.5, 52.8, 57.6, 41, 48.8, 59.2, 59.3, 65.2, 57.6, 52.5, 55.2, 52.7, 38.5, 50.3, 53.2, 51.6, 60, 50.2, 46.4, 37.8, 53.3, 41.8, 47.4, 67.3, 57.1, 47.8, 63.5, 41.6, 54.5, 62.9),
        Income = c(2948, 4644, 3665, 2878, 4493, 3855, 4917, 4524, 5079, 3738, 3354, 4623, 3290, 4507, 3772, 3751, 3853, 3112, 3090, 3302, 4309, 4340, 4180, 3859, 2626, 3781, 3500, 3789, 4563, 3737, 4701, 3077, 4712, 3252, 3086, 4020, 3387, 3719, 3971, 3959, 2990, 3123, 3119, 3606, 3227, 3468, 3712, 4053, 3061, 3812, 3815),
        Black = c(26.2, 3, 3, 18.3, 7, 3, 6, 14.3, 71.1, 15.3, 25.9, 1, 0.3, 12.8, 6.9, 1.2, 4.8, 7.2, 29.8, 0.3, 17.8, 3.1, 11.2, 0.9, 36.8, 10.3, 0.3, 2.7, 5.7, 0.3, 10.8, 1.9, 11.9, 22.2, 0.4, 9.1, 6.7, 1.3, 8, 2.7, 30.5, 0.3, 15.8, 12.5, 0.6, 0.2, 18.5, 2.1, 3.9, 2.9, 0.8),
        Female = c(51.7, 45.7, 50.8, 51.5, 50.8, 50.7, 51.5, 51.3, 53.5, 51.8, 51.4, 48, 50.1, 51.5, 51.3, 51.4, 51, 50.9, 51.4, 51.3, 51.1, 52.2, 51, 51, 51.6, 51.8, 50, 51.2, 49.3, 51.1, 51.6, 50.7, 52.2, 51, 49.5, 51.5, 51.3, 51, 52, 50.9, 50.9, 50.3, 51.6, 51, 50.6, 51.1, 50.6, 50.3, 51.6, 50.9, 50),
        Sales = structure(c(3L, 28L, 23L, 10L, 31L, 36L, 26L, 41L, 46L, 33L, 19L, 2L, 11L, 36L, 40L, 18L, 21L, 42L, 24L, 37L, 32L, 35L, 38L, 13L, 6L, 28L, 20L, 16L, 45L, 47L, 27L, 4L, 25L, 44L, 7L, 29L, 17L, 43L, 15L, 34L, 12L, 5L, 9L, 14L, 1L, 30L, 35L, 8L, 22L, 14L, 39L),
        .Label = c("65.5", "82.1", "89.8", "90", "92.7", "93.4", "93.8", "96.7", "99.8", "100.3", "102.4", "103.6", "104.3", "106.4", "107.3", "108.1", "108.4", "108.5", "109.9", "111.2", "114", "114.5", "115.2", "115.9", "119", "120", "120.7", "121.3", "121.6", "122.6", "123", "123.5", "123.6", "123.9", "124.3", "124.8", "128.5", "128.6", "132.2", "134.6", "155", "155.8", "157", "172.4", "189.5", "200.4", "265.7" ),class = "factor")),
        .Names = c("Age", "HS", "Income", "Black", "Female", "Sales"), row.names = c(NA, -51L), class = "data.frame")

这是我的模特。我试图用以下预测因子预测销售额,女性,年龄,HS,收入和黑人。所有类都是数字或整数。

model2 <- glm(Sales~., data=cg, family=binomial)

2 个答案:

答案 0 :(得分:2)

错误来自于尝试运行逻辑模型来预测本质上不是逻辑的响应。查看?family或此页面https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html,了解逻辑模型所需的各种变量。

引用该帮助文件:

  

对于二项式和准二项式家族,可以做出回应   以下列三种方式之一指定:

     
      
  1. 作为一个因素:'成功'被解释为没有成功的因素   第一级(因此通常具有第二级)。

  2.   
  3. 作为数值介于0和1之间的数值向量,解释为   成功案例的比例(给出的案件总数)   通过权重)。

  4.   
  5. 作为两列整数矩阵:第一列给出了数字   成功,第二是失败的数量。

  6.   

如果您对模型有后续问题,可以尝试在Cross Validated处询问。

答案 1 :(得分:0)

要预测像销售这样的数字数量,您需要线性回归,而不是逻辑回归。使用lm(Sales ~ ., ...),而不是lr()

逻辑回归仅适用于[0,1]范围内的值,例如(客户购买/违约/响应/取消订阅/等等,或属于特定群体,退休,是车主)的概率等等,正如Brian Stamper的回答所解释的那样。