多项回归中的预测概率不正确

时间:2017-06-20 17:52:13

标签: r logistic-regression prediction multinomial

我创建了一个多项模型来预测曲棍球比赛的结果。

library(tools)
library(utils)
library(dplyr)
library(nnet)
library(VGAM)
library(mlogit)
library(foreign)

数据集

structure(list(GID = 1:20, Date = structure(c(17097, 17100, 17102, 
17107, 17109, 17111, 17120, 17122, 17125, 17127, 17130, 17134, 
17142, 17144, 17146, 17162, 17167, 17170, 17172, 17174), class = "Date"), 
totHomeGoals = c(4L, 6L, 0L, 1L, 5L, 4L, 4L, 3L, 2L, 2L, 
2L, 2L, 5L, 3L, 5L, 2L, 3L, 2L, 3L, 1L), totAwayGoals = c(2L, 
1L, 4L, 5L, 1L, 1L, 1L, 2L, 3L, 2L, 3L, 1L, 5L, 2L, 1L, 3L, 
3L, 0L, 2L, 2L), TOIHome = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
0, 0, 1, 0, 0, 1, 1, 0, 1, 0), TOIAway = c(0, 0, 0, 0, 0, 
0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0), DEC = structure(c(3L, 
3L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 3L, 2L, 3L, 3L, 2L, 
2L, 3L, 2L, 1L), .Label = c("-1", "0", "1"), class = "factor"), 
totHomeShots = c(37L, 26L, 35L, 33L, 33L, 21L, 27L, 23L, 
30L, 41L, 36L, 38L, 38L, 32L, 32L, 36L, 25L, 24L, 35L, 24L
), totHomePP = c(1L, 3L, 0L, 1L, 1L, 2L, 1L, 0L, 1L, 1L, 
0L, 1L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L), totAwayShots = c(19L, 
29L, 37L, 34L, 22L, 26L, 34L, 29L, 29L, 35L, 25L, 40L, 34L, 
24L, 22L, 25L, 55L, 23L, 23L, 36L), totAwayPP = c(0L, 1L, 
1L, 1L, 0L, 0L, 0L, 0L, 2L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 
0L, 1L, 1L), totHomeSaves = c(17L, 28L, 33L, 29L, 21L, 25L, 
33L, 27L, 26L, 33L, 22L, 39L, 29L, 22L, 21L, 22L, 52L, 23L, 
21L, 34L), totAwaySaves = c(33L, 20L, 35L, 32L, 28L, 17L, 
23L, 20L, 28L, 39L, 34L, 36L, 33L, 29L, 27L, 34L, 22L, 22L, 
32L, 23L), HomeTeam = c("ANA", "ANA", "ANA", "ANA", "ANA", 
"ANA", "ANA", "ANA", "ANA", "ANA", "ANA", "ANA", "ANA", "ANA", 
"ANA", "ANA", "ANA", "ANA", "ANA", "ANA"), AwayTeam = c("VAN", 
"NSH", "CBJ", "PIT", "ARI", "CGY", "EDM", "NJD", "LAK", "NYI", 
"CHI", "MTL", "CAR", "SJS", "OTT", "SJS", "PHI", "DET", "ARI", 
"MIN"), HomeSH = c(0.108108108108108, 0.230769230769231, 
0, 0.0303030303030303, 0.151515151515152, 0.19047619047619, 
0.148148148148148, 0.130434782608696, 0.0666666666666667, 
0.0487804878048781, 0.0555555555555556, 0.0526315789473684, 
0.131578947368421, 0.09375, 0.15625, 0.0555555555555556, 
0.12, 0.0833333333333333, 0.0857142857142857, 0.0416666666666667
), AwaySH = c(0.105263157894737, 0.0344827586206897, 0.108108108108108, 
0.147058823529412, 0.0454545454545455, 0.0384615384615385, 
0.0294117647058824, 0.0689655172413793, 0.103448275862069, 
0.0571428571428571, 0.12, 0.025, 0.147058823529412, 0.0833333333333333, 
0.0454545454545455, 0.12, 0.0545454545454545, 0, 0.0869565217391304, 
0.0555555555555556), HomeSV = c(0.894736842105263, 0.96551724137931, 
0.891891891891892, 0.852941176470588, 0.954545454545455, 
0.961538461538462, 0.970588235294118, 0.931034482758621, 
0.896551724137931, 0.942857142857143, 0.88, 0.975, 0.852941176470588, 
0.916666666666667, 0.954545454545455, 0.88, 0.945454545454545, 
1, 0.91304347826087, 0.944444444444444), AwaySV = c(0.891891891891892, 
0.769230769230769, 1, 0.96969696969697, 0.848484848484849, 
0.80952380952381, 0.851851851851852, 0.869565217391304, 0.933333333333333, 
0.951219512195122, 0.944444444444444, 0.947368421052632, 
0.868421052631579, 0.90625, 0.84375, 0.944444444444444, 0.88, 
0.916666666666667, 0.914285714285714, 0.958333333333333)), .Names = c("GID", 
"Date", "totHomeGoals", "totAwayGoals", "TOIHome", "TOIAway", 
"DEC", "totHomeShots", "totHomePP", "totAwayShots", "totAwayPP",  
"totHomeSaves", "totAwaySaves", "HomeTeam", "AwayTeam", "HomeSH", 
"AwaySH", "HomeSV", "AwaySV"), row.names = c(NA, 20L), class = "data.frame")

这是我的模特

    {Kolzig <- multinom(DEC ~ totHomeShots + totHomePP + totAwayShots + 
totAwayPP + totHomeSaves + totAwaySaves+ HomeSH*totHomeShots + 
AwaySH*totAwayShots + HomeSV + AwaySV, data = NHL6)}

然后我使用.pred语句。

    {Kolzig.pred <- predict(Kolzig, type="probs")}

然而,结果显然不正确。

-1            0            1
1     7.348283e-23 5.738844e-06 9.999943e-0
2     6.908534e-58 2.563978e-23 1.000000e+00
3     1.000000e+00 1.217702e-18 4.799552e-46
4     1.000000e+00 4.093737e-19 1.608055e-46
5     4.937595e-46 2.689526e-17 1.000000e+00

许多游戏结果显示某个结果接近100%的概率,这与实际发生的结果一致。这里应该改变什么?

0 个答案:

没有答案