我创建了一个多项模型来预测曲棍球比赛的结果。
包
library(tools)
library(utils)
library(dplyr)
library(nnet)
library(VGAM)
library(mlogit)
library(foreign)
数据集
structure(list(GID = 1:20, Date = structure(c(17097, 17100, 17102,
17107, 17109, 17111, 17120, 17122, 17125, 17127, 17130, 17134,
17142, 17144, 17146, 17162, 17167, 17170, 17172, 17174), class = "Date"),
totHomeGoals = c(4L, 6L, 0L, 1L, 5L, 4L, 4L, 3L, 2L, 2L,
2L, 2L, 5L, 3L, 5L, 2L, 3L, 2L, 3L, 1L), totAwayGoals = c(2L,
1L, 4L, 5L, 1L, 1L, 1L, 2L, 3L, 2L, 3L, 1L, 5L, 2L, 1L, 3L,
3L, 0L, 2L, 2L), TOIHome = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 0, 1, 1, 0, 1, 0), TOIAway = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0), DEC = structure(c(3L,
3L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 2L, 1L, 3L, 2L, 3L, 3L, 2L,
2L, 3L, 2L, 1L), .Label = c("-1", "0", "1"), class = "factor"),
totHomeShots = c(37L, 26L, 35L, 33L, 33L, 21L, 27L, 23L,
30L, 41L, 36L, 38L, 38L, 32L, 32L, 36L, 25L, 24L, 35L, 24L
), totHomePP = c(1L, 3L, 0L, 1L, 1L, 2L, 1L, 0L, 1L, 1L,
0L, 1L, 0L, 0L, 3L, 0L, 0L, 0L, 0L, 0L), totAwayShots = c(19L,
29L, 37L, 34L, 22L, 26L, 34L, 29L, 29L, 35L, 25L, 40L, 34L,
24L, 22L, 25L, 55L, 23L, 23L, 36L), totAwayPP = c(0L, 1L,
1L, 1L, 0L, 0L, 0L, 0L, 2L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 1L, 1L), totHomeSaves = c(17L, 28L, 33L, 29L, 21L, 25L,
33L, 27L, 26L, 33L, 22L, 39L, 29L, 22L, 21L, 22L, 52L, 23L,
21L, 34L), totAwaySaves = c(33L, 20L, 35L, 32L, 28L, 17L,
23L, 20L, 28L, 39L, 34L, 36L, 33L, 29L, 27L, 34L, 22L, 22L,
32L, 23L), HomeTeam = c("ANA", "ANA", "ANA", "ANA", "ANA",
"ANA", "ANA", "ANA", "ANA", "ANA", "ANA", "ANA", "ANA", "ANA",
"ANA", "ANA", "ANA", "ANA", "ANA", "ANA"), AwayTeam = c("VAN",
"NSH", "CBJ", "PIT", "ARI", "CGY", "EDM", "NJD", "LAK", "NYI",
"CHI", "MTL", "CAR", "SJS", "OTT", "SJS", "PHI", "DET", "ARI",
"MIN"), HomeSH = c(0.108108108108108, 0.230769230769231,
0, 0.0303030303030303, 0.151515151515152, 0.19047619047619,
0.148148148148148, 0.130434782608696, 0.0666666666666667,
0.0487804878048781, 0.0555555555555556, 0.0526315789473684,
0.131578947368421, 0.09375, 0.15625, 0.0555555555555556,
0.12, 0.0833333333333333, 0.0857142857142857, 0.0416666666666667
), AwaySH = c(0.105263157894737, 0.0344827586206897, 0.108108108108108,
0.147058823529412, 0.0454545454545455, 0.0384615384615385,
0.0294117647058824, 0.0689655172413793, 0.103448275862069,
0.0571428571428571, 0.12, 0.025, 0.147058823529412, 0.0833333333333333,
0.0454545454545455, 0.12, 0.0545454545454545, 0, 0.0869565217391304,
0.0555555555555556), HomeSV = c(0.894736842105263, 0.96551724137931,
0.891891891891892, 0.852941176470588, 0.954545454545455,
0.961538461538462, 0.970588235294118, 0.931034482758621,
0.896551724137931, 0.942857142857143, 0.88, 0.975, 0.852941176470588,
0.916666666666667, 0.954545454545455, 0.88, 0.945454545454545,
1, 0.91304347826087, 0.944444444444444), AwaySV = c(0.891891891891892,
0.769230769230769, 1, 0.96969696969697, 0.848484848484849,
0.80952380952381, 0.851851851851852, 0.869565217391304, 0.933333333333333,
0.951219512195122, 0.944444444444444, 0.947368421052632,
0.868421052631579, 0.90625, 0.84375, 0.944444444444444, 0.88,
0.916666666666667, 0.914285714285714, 0.958333333333333)), .Names = c("GID",
"Date", "totHomeGoals", "totAwayGoals", "TOIHome", "TOIAway",
"DEC", "totHomeShots", "totHomePP", "totAwayShots", "totAwayPP",
"totHomeSaves", "totAwaySaves", "HomeTeam", "AwayTeam", "HomeSH",
"AwaySH", "HomeSV", "AwaySV"), row.names = c(NA, 20L), class = "data.frame")
这是我的模特
{Kolzig <- multinom(DEC ~ totHomeShots + totHomePP + totAwayShots +
totAwayPP + totHomeSaves + totAwaySaves+ HomeSH*totHomeShots +
AwaySH*totAwayShots + HomeSV + AwaySV, data = NHL6)}
然后我使用.pred语句。
{Kolzig.pred <- predict(Kolzig, type="probs")}
然而,结果显然不正确。
-1 0 1
1 7.348283e-23 5.738844e-06 9.999943e-0
2 6.908534e-58 2.563978e-23 1.000000e+00
3 1.000000e+00 1.217702e-18 4.799552e-46
4 1.000000e+00 4.093737e-19 1.608055e-46
5 4.937595e-46 2.689526e-17 1.000000e+00
许多游戏结果显示某个结果接近100%的概率,这与实际发生的结果一致。这里应该改变什么?