我有一个不平衡的数据集(5%),我应用了Caret包中的“AdaBag”算法。
我的数据集:
dput(列表[90:120,])
structure(list(Y = c(0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L), X1 = c(0L, -5L, 0L, 0L, 0L, 0L, 0L, -3L,
0L, 0L, -4L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L,
-1L, -1L, -3L, 0L, 0L, 0L, -3L, 0L), X2 = c(0L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L), X3 = c(0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X4 = c(0L, 0L, 1L,
1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L), X5 = c(1L, 1L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L), X6 = c(0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L), X7 = c(0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L), X8 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X9 = c(1L,
0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L), X10 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X11 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), X12 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X13 = c(0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X14 = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L), X15 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L), X16 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X17 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X18 = c(0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L), X19 = c(1L,
0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 1L), X20 = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), X21 = c(7.720138889,
2.10625, 6.710416667, 20.91180556, 1.960416667, 2.76875, 1.35,
2.153472222, 2.017361111, 4.422222222, 2.848611111, 57.30972222,
2.613194444, 1.60625, 3.338888889, 3.846527778, 10.67847222,
5.290972222, 2.776388889, 25.92708333, 4.334722222, 4.979861111,
8.701388889, 1.715972222, 41.89166667, 9.657638889, 19.50763889,
1.008333333, 2.595138889, 5.829166667, 6.839583333), X22 = c(1L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X23 = c(66,
60, 67, 47, 55, 79, 75, 74, 55, 55, 55, 76, 55, 55, 80, 44, 80,
55, 55, 78, 55, 55, 65, 55, 67, 90, 55, 55, 55, 35, 75), X24 = c(1L,
1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L,
0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X25 = c(0L,
0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), X26 = c(135L,
143L, 137L, 138L, 138L, 137L, 145L, 154L, 135L, 139L, 130L, 136L,
138L, 137L, 134L, 141L, 147L, 142L, 139L, 146L, 150L, 141L, 143L,
132L, 146L, 142L, 148L, 139L, 137L, 151L, 146L), X27 = c(3.4,
3.6, 3.7, 3.4, 3.5, 4.1, 3.6, 3.8, 3.6, 3.9, 3.7, 3.3, 4.5, 3.9,
3.5, 3.4, 3.4, 3.7, 4.6, 3.4, 3.3, 3.4, 3.9, 3.3, 5.5, 4.4, 5.6,
4, 3.5, 3.4, 3.7), X28 = c(315L, 271L, 445L, 416L, 320L, 161L,
434L, 110L, 161L, 259L, 142L, 196L, 159L, 78L, 231L, 299L, 284L,
378L, 266L, 460L, 247L, 357L, 267L, 183L, 308L, 8L, 279L, 107L,
30L, 151L, 324L), X29 = c(155L, 94L, 133L, 141L, 113L, 96L, 40L,
157L, 106L, 121L, 81L, 114L, 76L, 96L, 113L, 126L, 110L, 124L,
96L, 202L, 135L, 118L, 178L, 127L, 141L, 205L, 248L, 107L, 82L,
69L, 149L), X30 = c(9L, 11L, 11L, 11L, 6L, 14L, 7L, 19L, 6L,
10L, 92L, 13L, 7L, 6L, 12L, 12L, 17L, 12L, 7L, 5L, 8L, 12L, 13L,
5L, 13L, 7L, 14L, 15L, 5L, 19L, 17L), X31 = c(9.76, 13.23, 12.53,
14, 14.04, 10.27, 10.19, 2.66, 10.43, 12.53, 21.72, 10.84, 8.98,
3.8, 10.25, 17.86, 10.48, 13.65, 8.11, 7.34, 7.09, 7.55, 5.34,
10.2, 17.59, 8.9, 9.67, 11.6, 4.74, 25.37, 17.87), X32 = c(2.37,
2.48, 2.76, 1.78, 2.12, 2.41, 2.52, 2.92, 1.94, 2.03, 2.52, 2.59,
2.48, 1.55, 2.08, 2.31, 2.78, 1.98, 2.24, 2.22, 2.28, 2.37, 2.18,
2.17, 2.29, 2.2, 2.64, 1.16, 2.37, 2.09, 3.09), X33 = c(10.4,
1, 6.1, 9.5, 18.4, -3.6, 1.4, -0.1, 2.6, 5.4, -8.7, 0.1, 1, 4.8,
8.4, 1.4, -1.4, 1.8, 5.7, 8.2, 11.8, 13.7, 5.5, 5.9, 0.7, 3.2,
3.6, 1.5, 2.1, 21.3, 2.5), X34 = c(1.07, 0.41, 0.76, 0.52, 0.29,
0.28, 0.36, 0.64, 0.73, 0.31, 2.08, 0.47, 0.44, 0.66, 0.82, 0.6,
0.52, 0.53, 0.29, 0.31, 0.66, 0.39, 1.24, 0.58, 0.79, 0.7, 1.18,
0.66, 3.72, 1.5, 0.66), X35 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), X36 = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), X37 = c(4.04, 1.54, 20.34,
0, 0, 5.19, 10.47, 1.82, 4.75, 0, 2.14, 0, 2.46, 5.06, 6.27,
0.89, 5.32, 2.71, 33.2, 0, 0.78, 0, 10.32, 4.62, 0, 0, 0, 10.75,
0, 0.52, 0), X38 = c(0.67, 1.89, 1.36, 0.34, 0.73, 0.94, 1.06,
1.53, 1.05, 0.62, 2.7, 0.88, 0.78, 0.31, 0.47, 0.42, 1.55, 0.6,
0.45, 1.24, 1.63, 0.76, 0.41, 0.57, 5.69, 6.05, 1.29, 1.2, 1.98,
0.67, 2.01), X39 = c(0.01, 0.1, 0.16, 0.03, 0, 0.21, 0.16, 0.25,
0.06, 0, 0.35, 0, 0, 0.04, 0.02, 0.02, 0.13, 0.12, 0.06, 0, 0,
0, 0.01, 0.03, 0.31, 0, 0, 0.12, 0, 0.09, 0), X40 = c(2.7, 3.9,
2, 2.3, 2.1, 2.8, 3, 2.9, 3.9, 3, 2.2, 3, 3.7, 2, 1.6, 2.8, 1,
2.6, 3.1, 2.2, 3.2, 3.3, 1.5, 2.8, 2, 2.3, 2.6, 3.1, 2.5, 1.8,
2.9), X41 = c(7.48, 7.41, 7.49, 7.52, 7.35, 7.28, 7.48, 7.16,
7.42, 7.41, 7.23, 7.49, 7.46, 7.41, 7.42, 7.53, 7.47, 7.42, 7.39,
7.34, 7.4, 7.46, 7.39, 7.39, 7.3, 7.23, 7.44, 7.44, 7.35, 7.46,
7.42), X42 = c(82.2, 59.7, 68.2, 154, 36.6, 213, 136, 48.5, 131,
34.8, 86.5, 105, 74.3, 41, 29, 80.5, 138, 47.7, 271, 34.4, 40.8,
53.2, 127, 54.1, 44.1, 38.9, 48.2, 70.7, 115, 75, 53), X43 = c(60L,
72L, 57L, 41L, 21L, 70L, 120L, 154L, 34L, 21L, 73L, 80L, 28L,
8L, 49L, 34L, 96L, 52L, 51L, 69L, 128L, 81L, 27L, 28L, 381L,
199L, 96L, 67L, 146L, 102L, 141L), X44 = c(95L, 109L, 101L, 94L,
92L, 105L, 112L, 114L, 97L, 98L, 97L, 102L, 106L, 104L, 97L,
109L, 117L, 108L, 100L, 100L, 105L, 94L, 103L, 96L, 103L, 99L,
115L, 104L, 102L, 95L, 111L), X45 = c(10.6, 12.8, 7.8, 8.4, 11.1,
12, 7.9, 8.5, 11.1, 14.8, 9.1, 9.9, 12.1, 8, 10, 11.7, 8, 8.7,
10.6, 8.4, 11.8, 10.6, 9.3, 10.1, 8.3, 7.8, 10, 9.8, 7.7, 12.3,
10.2), X46 = c(33.5, 33, 23.5, 24.9, 34.8, 36.5, 24.8, 26.9,
32.1, 36.9, 27.5, 31.5, 35.7, 22.5, 32.2, 33.5, 24.9, 25.1, 32.1,
28.4, 38.6, 33.8, 27.8, 31, 26.7, 25, 32.8, 27.4, 24.3, 38.4,
32.6), X47 = c(132.2, 132.44, 125.83, 127.11, 118, 141, 144,
135, 122.8, 113.5, 77.67, 117, 119.17, 95.67, 97, 152.29, 96.8,
111.57, 100.67, 73.75, 150, 135, 110.4, 99, 202.4, 151, 167.4,
113, 106.33, 146.5, 161.5), X48 = c(4230L, 2130L, 2330L, 1860L,
1070L, 1770L, 3100L, 1480L, 1600L, 1000L, 60L, 350L, 1650L, 2320L,
1640L, 2030L, 2920L, 1590L, 1825L, 2045L, 2250L, 1300L, 2580L,
1910L, 20L, 80L, 1700L, 3040L, 4750L, 4050L, 1500L), X49 = c(37.96,
37.74, 38.23, 37.74, 37.15, 36.55, 37.61, 38.17, 37.6, 38.14,
37.86, 38.89, 38.58, 37.78, 37.89, 39.18, 37.74, 39.2, 37.63,
36.57, 36.64, 37.64, 38.52, 37.54, 36.67, 37.12, 37.04, 37.64,
37.19, 38.77, 37.03), X50 = c(35.59, 36.74, 21.11, 18.96, 31.7,
35.25, 32.99, 37.19, 21.38, 20.4, 35.21, 36.73, 36.84, 29.88,
18.9, 20.65, 36.61, 34.23, 27.75, 18.82, 21.13, 36.48, 36.08,
20.43, 35.92, 36.24, 21.68, 34.06, 21.02, 34.66, 36.55), X51 = c(0L,
0L, 18L, 1L, 0L, 0L, 2L, 2L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 8L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 1L),
X52 = c(1L, 3L, 0L, 1L, 0L, 5L, 5L, 4L, 0L, 0L, 18L, 6L,
17L, 0L, 13L, 3L, 15L, 0L, 1L, 14L, 0L, 10L, 9L, 1L, 0L,
0L, 8L, 6L, 0L, 22L, 2L), X53 = c(0L, 0L, 1L, 1L, 0L, 0L,
1L, 9L, 1L, 0L, 9L, 5L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 5L, 3L,
1L, 2L, 0L, 1L, 8L, 2L, 2L, 0L, 5L, 1L), X54 = c(1L, 5L,
7L, 4L, 3L, 2L, 4L, 3L, 3L, 13L, 0L, 3L, 10L, 0L, 3L, 1L,
2L, 1L, 0L, 7L, 2L, 19L, 2L, 1L, 2L, 8L, 2L, 5L, 0L, 16L,
6L), X55 = c(22L, 20L, 18L, 19L, 21L, 21L, 19L, 6L, 16L,
11L, 1L, 15L, 12L, 24L, 19L, 21L, 9L, 24L, 23L, 11L, 10L,
4L, 19L, 22L, 22L, 2L, 19L, 17L, 23L, 2L, 13L), X56 = c(2L,
8L, 7L, 10L, 4L, 4L, 6L, 2L, 8L, 12L, 1L, 8L, 6L, 0L, 5L,
4L, 1L, 2L, 0L, 16L, 7L, 8L, 7L, 2L, 4L, 7L, 5L, 3L, 0L,
10L, 7L), X57 = c(12L, 8L, 16L, 20L, 20L, 9L, 11L, 9L, 15L,
1L, 2L, 4L, 7L, 0L, 0L, 19L, 6L, 23L, 14L, 12L, 0L, 2L, 6L,
7L, 21L, 18L, 13L, 10L, 17L, 16L, 0L), X58 = c(15L, 14L,
24L, 19L, 23L, 14L, 8L, 7L, 8L, 5L, 0L, 5L, 16L, 6L, 2L,
22L, 1L, 23L, 13L, 14L, 1L, 15L, 5L, 11L, 23L, 19L, 13L,
10L, 8L, 23L, 20L), X59 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
16L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), X60 = c(0L, 3L, 15L,
8L, 0L, 0L, 0L, 18L, 1L, 4L, 20L, 13L, 6L, 14L, 7L, 20L,
12L, 23L, 1L, 0L, 2L, 0L, 1L, 1L, 0L, 0L, 6L, 5L, 3L, 17L,
0L), X61 = c(6.53, 9.22, 5.78, 5.68, 4.4, 5.23, 4.16, 14.58,
20.73, 8.23, 21.48, 24.18, 7.08, 15.56, 14.73, 4.89, 8.98,
10.02, 10.4, 9.51, 16.5, 4.53, 19.94, 6.16, 4.25, 4.76, 15.54,
12.86, 9.37, 9.66, 3.7)), .Names = c("Y", "X1", "X2", "X3",
"X4", "X5", "X6", "X7", "X8", "X9", "X10", "X11", "X12", "X13",
"X14", "X15", "X16", "X17", "X18", "X19", "X20", "X21", "X22",
"X23", "X24", "X25", "X26", "X27", "X28", "X29", "X30", "X31",
"X32", "X33", "X34", "X35", "X36", "X37", "X38", "X39", "X40",
"X41", "X42", "X43", "X44", "X45", "X46", "X47", "X48", "X49",
"X50", "X51", "X52", "X53", "X54", "X55", "X56", "X57", "X58",
"X59", "X60", "X61"), row.names = 90:120, class = "data.frame")
这是代码:
ctrl <- trainControl(method = "cv",
number = 10,
repeats = 1,
p = 0.80,
search = "grid",
initialWindow = NULL,
horizon = 1,
fixedWindow = TRUE,
skip = 0,
verboseIter = FALSE,
returnData = TRUE,
returnResamp = "final",
savePredictions = "all",
classProbs = FALSE,
summaryFunction = defaultSummary,
preProcOptions = list(thresh = 0.80, ICAcomp = 3, k = 7, freqCut = 90/10,uniqueCut = 10, cutoff = 0.9),
sampling = "smote",
selectionFunction = "best",
index = NULL,
indexOut = NULL,
indexFinal = NULL,
timingSamps = 0,
predictionBounds = rep(FALSE, 2),
seeds = NA,
adaptive = list(min = 5,alpha = 0.05, method = "gls", complete = TRUE),
trim = FALSE,
allowParallel = TRUE)
classifier <- train(x = training_set[,-1],y = training_set[,1], method = 'AdaBag',trControl = ctrl)
结果:
> Accuracy
[1] 74.88%
> Sensitivity
[1] 17.14%
> Specificity
[1] 78.42%
> Precision
[1] 4.65%
我想提高灵敏度和精度,有什么建议吗? 可能会对FN错误加罚?成本敏感? (不知道怎么样) 当我用C5.0过度采样和提升时我得到了更好的结果,不知道为什么因为除了装袋之外它基本相同,我不认为装袋是原因。 如何控制过采样首选项,而不仅仅是列出SMOTE类型。