我使用C5.0制作决策树,并在树中使用我的类标签。我的数据片段如下。
trainX
V1 V2 V3 V4 V5 V6
1 39 State-gov 77516 Bachelors 13 Never-married
2 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse
3 38 Private 215646 HS-grad 9 Divorced
4 53 Private 234721 11th 7 Married-civ-spouse
5 28 Private 338409 Bachelors 13 Married-civ-spouse
V7 V8 V9 V10 V11 V12 V13 V14
1 Adm-clerical Not-in-family White Male 2174 0 40 United-States
2 Exec-managerial Husband White Male 0 0 13 United-States
3 Handlers-cleaners Not-in-family White Male 0 0 40 United-States
4 Handlers-cleaners Husband Black Male 0 0 40 United-States
5 Prof-specialty Wife Black Female 0 0 40 Cuba
trainY
[1] <=50K <=50K <=50K <=50K <=50K
我的数据中也有&gt; 50K的情况,这个5的片段不包含任何内容。
当我创建树时,这是我使用的代码
library(C50)
trainX = X[1:100,]
trainY = Y[1:100]
testX = X[101:150,]
testY = Y[101:150]
model = C5.0(trainX, trainY)
summary(model)
我得到的输出是......
Decision tree:
<=50K (100/25)
评估训练数据(100例):
Decision Tree
----------------
Size Errors
1 25(25.0%) <<
(a) (b) <-classified as
---- ----
75 (a): class <=50K
25 (b): class >50K
我使用分类作为树的一部分,我做错了什么?
编辑 - 头部以下的DPUTS。仍然给了我同样的问题,它使用拆分为&lt; = 50K或&gt; 50K制作决策树,这是我的&#34; Y&#34;输出,因此不应该成为决策过程的一部分。
trainX
structure(list(V1 = c(39L, 50L, 38L, 53L, 28L, 37L), V2 = structure(c(8L,
7L, 5L, 5L, 5L, 5L), .Label = c(" ?", " Federal-gov", " Local-gov",
" Never-worked", " Private", " Self-emp-inc", " Self-emp-not-inc",
" State-gov", " Without-pay"), class = "factor"), V3 = c(77516L,
83311L, 215646L, 234721L, 338409L, 284582L), V4 = structure(c(10L,
10L, 12L, 2L, 10L, 13L), .Label = c(" 10th", " 11th", " 12th",
" 1st-4th", " 5th-6th", " 7th-8th", " 9th", " Assoc-acdm", " Assoc-voc",
" Bachelors", " Doctorate", " HS-grad", " Masters", " Preschool",
" Prof-school", " Some-college"), class = "factor"), V5 = c(13L,
13L, 9L, 7L, 13L, 14L), V6 = structure(c(5L, 3L, 1L, 3L, 3L,
3L), .Label = c(" Divorced", " Married-AF-spouse", " Married-civ-spouse",
" Married-spouse-absent", " Never-married", " Separated", " Widowed"
), class = "factor"), V7 = structure(c(2L, 5L, 7L, 7L, 11L, 5L
), .Label = c(" ?", " Adm-clerical", " Armed-Forces", " Craft-repair",
" Exec-managerial", " Farming-fishing", " Handlers-cleaners",
" Machine-op-inspct", " Other-service", " Priv-house-serv", " Prof-specialty",
" Protective-serv", " Sales", " Tech-support", " Transport-moving"
), class = "factor"), V8 = structure(c(2L, 1L, 2L, 1L, 6L, 6L
), .Label = c(" Husband", " Not-in-family", " Other-relative",
" Own-child", " Unmarried", " Wife"), class = "factor"), V9 = structure(c(5L,
5L, 5L, 3L, 3L, 5L), .Label = c(" Amer-Indian-Eskimo", " Asian-Pac-Islander",
" Black", " Other", " White"), class = "factor"), V10 = structure(c(2L,
2L, 2L, 2L, 1L, 1L), .Label = c(" Female", " Male"), class = "factor"),
V11 = c(2174L, 0L, 0L, 0L, 0L, 0L), V12 = c(0L, 0L, 0L, 0L,
0L, 0L), V13 = c(40L, 13L, 40L, 40L, 40L, 40L), V14 = structure(c(40L,
40L, 40L, 40L, 6L, 40L), .Label = c(" ?", " Cambodia", " Canada",
" China", " Columbia", " Cuba", " Dominican-Republic", " Ecuador",
" El-Salvador", " England", " France", " Germany", " Greece",
" Guatemala", " Haiti", " Holand-Netherlands", " Honduras",
" Hong", " Hungary", " India", " Iran", " Ireland", " Italy",
" Jamaica", " Japan", " Laos", " Mexico", " Nicaragua", " Outlying-US(Guam-USVI-etc)",
" Peru", " Philippines", " Poland", " Portugal", " Puerto-Rico",
" Scotland", " South", " Taiwan", " Thailand", " Trinadad&Tobago",
" United-States", " Vietnam", " Yugoslavia"), class = "factor")), .Names = c("V1",
"V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11",
"V12", "V13", "V14"), row.names = c(NA, 6L), class = "data.frame")
trainY
structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c(" <=50K", " >50K"
), class = "factor")
在trainX,trainY中读完后,重现此问题的最简单方法就是
library(C50)
test = C5.0(x=trainX, y=trainY)
我的实际火车Y:
structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L, 2L, 2L, 1L, 1L), .Label = c(" <=50K", " >50K"), class = "factor")
我的实际火车X
structure(list(age = c(39L, 50L, 38L, 53L, 28L, 37L, 49L, 52L,
31L, 42L, 37L, 30L, 23L, 32L, 40L, 34L, 25L, 32L, 38L, 43L, 40L,
54L, 35L, 43L, 59L, 56L, 19L, 54L, 39L, 49L, 23L, 20L, 45L, 30L,
22L, 48L, 21L, 19L, 31L, 48L, 31L, 53L, 24L, 49L, 25L, 57L, 53L,
44L, 41L, 29L, 25L, 18L, 47L, 50L, 47L, 43L, 46L, 35L, 41L, 30L,
30L, 32L, 48L, 42L, 29L, 36L, 28L, 53L, 49L, 25L, 19L, 31L, 29L,
23L, 79L, 27L, 40L, 67L, 18L, 31L, 18L, 52L, 46L, 59L, 44L, 53L,
49L, 33L, 30L, 43L, 57L, 37L, 28L, 30L, 34L, 29L, 48L, 37L, 48L,
32L), workClass = structure(c(8L, 7L, 5L, 5L, 5L, 5L, 5L, 7L,
5L, 5L, 5L, 8L, 5L, 5L, 5L, 5L, 7L, 5L, 5L, 7L, 5L, 5L, 2L, 5L,
5L, 3L, 5L, 1L, 5L, 5L, 3L, 5L, 5L, 2L, 8L, 5L, 5L, 5L, 5L, 7L,
5L, 7L, 5L, 5L, 5L, 2L, 5L, 5L, 8L, 5L, 5L, 5L, 5L, 2L, 6L, 5L,
5L, 5L, 5L, 5L, 5L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 1L, 5L, 5L,
7L, 5L, 5L, 5L, 5L, 1L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 5L,
5L, 2L, 5L, 5L, 5L, 5L, 3L, 3L, 7L, 5L, 5L, 2L), .Label = c(" ?",
" Federal-gov", " Local-gov", " Never-worked", " Private", " Self-emp-inc",
" Self-emp-not-inc", " State-gov", " Without-pay"), class = "factor"),
fnlwgt = c(77516L, 83311L, 215646L, 234721L, 338409L, 284582L,
160187L, 209642L, 45781L, 159449L, 280464L, 141297L, 122272L,
205019L, 121772L, 245487L, 176756L, 186824L, 28887L, 292175L,
193524L, 302146L, 76845L, 117037L, 109015L, 216851L, 168294L,
180211L, 367260L, 193366L, 190709L, 266015L, 386940L, 59951L,
311512L, 242406L, 197200L, 544091L, 84154L, 265477L, 507875L,
88506L, 172987L, 94638L, 289980L, 337895L, 144361L, 128354L,
101603L, 271466L, 32275L, 226956L, 51835L, 251585L, 109832L,
237993L, 216666L, 56352L, 147372L, 188146L, 59496L, 293936L,
149640L, 116632L, 105598L, 155537L, 183175L, 169846L, 191681L,
200681L, 101509L, 309974L, 162298L, 211678L, 124744L, 213921L,
32214L, 212759L, 309634L, 125927L, 446839L, 276515L, 51618L,
159937L, 343591L, 346253L, 268234L, 202051L, 54334L, 410867L,
249977L, 286730L, 212563L, 117747L, 226296L, 115585L, 191277L,
202683L, 171095L, 249409L), education = structure(c(10L,
10L, 12L, 2L, 10L, 13L, 7L, 12L, 13L, 10L, 16L, 10L, 10L,
8L, 9L, 6L, 12L, 12L, 2L, 13L, 11L, 12L, 7L, 2L, 12L, 10L,
12L, 16L, 12L, 12L, 8L, 16L, 10L, 16L, 16L, 2L, 16L, 12L,
16L, 8L, 7L, 10L, 10L, 12L, 12L, 10L, 12L, 13L, 9L, 9L, 16L,
12L, 15L, 10L, 12L, 16L, 5L, 9L, 12L, 12L, 10L, 6L, 12L,
11L, 16L, 12L, 16L, 12L, 16L, 16L, 16L, 10L, 10L, 16L, 16L,
12L, 8L, 1L, 2L, 6L, 12L, 10L, 12L, 12L, 12L, 12L, 12L, 13L,
7L, 11L, 9L, 16L, 16L, 12L, 10L, 16L, 11L, 16L, 8L, 12L), .Label = c(" 10th",
" 11th", " 12th", " 1st-4th", " 5th-6th", " 7th-8th", " 9th",
" Assoc-acdm", " Assoc-voc", " Bachelors", " Doctorate",
" HS-grad", " Masters", " Preschool", " Prof-school", " Some-college"
), class = "factor"), educationNum = c(13L, 13L, 9L, 7L,
13L, 14L, 5L, 9L, 14L, 13L, 10L, 13L, 13L, 12L, 11L, 4L,
9L, 9L, 7L, 14L, 16L, 9L, 5L, 7L, 9L, 13L, 9L, 10L, 9L, 9L,
12L, 10L, 13L, 10L, 10L, 7L, 10L, 9L, 10L, 12L, 5L, 13L,
13L, 9L, 9L, 13L, 9L, 14L, 11L, 11L, 10L, 9L, 15L, 13L, 9L,
10L, 3L, 11L, 9L, 9L, 13L, 4L, 9L, 16L, 10L, 9L, 10L, 9L,
10L, 10L, 10L, 13L, 13L, 10L, 10L, 9L, 12L, 6L, 7L, 4L, 9L,
13L, 9L, 9L, 9L, 9L, 9L, 14L, 5L, 16L, 11L, 10L, 10L, 9L,
13L, 10L, 16L, 10L, 12L, 9L), marital = structure(c(5L, 3L,
1L, 3L, 3L, 3L, 4L, 3L, 5L, 3L, 3L, 3L, 5L, 5L, 3L, 3L, 5L,
5L, 3L, 1L, 3L, 6L, 3L, 3L, 1L, 3L, 5L, 3L, 1L, 3L, 5L, 5L,
1L, 3L, 3L, 5L, 5L, 2L, 3L, 3L, 3L, 3L, 3L, 6L, 5L, 3L, 3L,
1L, 3L, 5L, 3L, 5L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
3L, 3L, 1L, 3L, 1L, 3L, 3L, 5L, 5L, 6L, 3L, 5L, 3L, 5L, 3L,
3L, 5L, 3L, 5L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 5L, 5L, 3L, 1L,
1L, 3L, 3L, 5L, 3L, 3L, 1L, 5L), .Label = c(" Divorced",
" Married-AF-spouse", " Married-civ-spouse", " Married-spouse-absent",
" Never-married", " Separated", " Widowed"), class = "factor"),
occ = structure(c(2L, 5L, 7L, 7L, 11L, 5L, 9L, 5L, 11L, 5L,
5L, 11L, 2L, 13L, 4L, 15L, 6L, 8L, 13L, 5L, 11L, 9L, 6L,
15L, 14L, 14L, 4L, 1L, 5L, 4L, 12L, 13L, 5L, 2L, 9L, 8L,
8L, 2L, 13L, 11L, 8L, 11L, 14L, 2L, 7L, 11L, 8L, 5L, 4L,
11L, 5L, 9L, 11L, 5L, 5L, 14L, 8L, 9L, 2L, 8L, 13L, 1L, 15L,
11L, 14L, 4L, 2L, 2L, 5L, 1L, 11L, 13L, 13L, 8L, 11L, 9L,
2L, 1L, 9L, 6L, 13L, 9L, 9L, 13L, 4L, 13L, 12L, 11L, 13L,
11L, 11L, 4L, 8L, 13L, 12L, 7L, 11L, 13L, 5L, 9L), .Label = c(" ?",
" Adm-clerical", " Armed-Forces", " Craft-repair", " Exec-managerial",
" Farming-fishing", " Handlers-cleaners", " Machine-op-inspct",
" Other-service", " Priv-house-serv", " Prof-specialty",
" Protective-serv", " Sales", " Tech-support", " Transport-moving"
), class = "factor"), relationship = structure(c(2L, 1L,
2L, 1L, 6L, 6L, 2L, 1L, 2L, 1L, 1L, 1L, 4L, 2L, 1L, 1L, 4L,
5L, 1L, 5L, 1L, 5L, 1L, 1L, 5L, 1L, 4L, 1L, 2L, 1L, 2L, 4L,
4L, 4L, 1L, 5L, 4L, 6L, 1L, 1L, 1L, 1L, 1L, 5L, 2L, 1L, 1L,
5L, 1L, 2L, 6L, 4L, 6L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 2L, 6L, 1L, 4L, 4L, 4L, 1L, 2L, 3L, 4L, 1L,
1L, 4L, 1L, 2L, 1L, 6L, 1L, 2L, 4L, 1L, 1L, 2L, 2L, 1L, 5L,
5L, 6L, 1L, 2L, 1L, 1L, 5L, 4L), .Label = c(" Husband", " Not-in-family",
" Other-relative", " Own-child", " Unmarried", " Wife"), class = "factor"),
race = structure(c(5L, 5L, 5L, 3L, 3L, 5L, 3L, 5L, 5L, 5L,
3L, 2L, 5L, 3L, 2L, 1L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 5L, 5L,
5L, 5L, 2L, 5L, 5L, 5L, 3L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 4L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 2L, 5L, 5L, 5L, 5L, 5L, 3L
), .Label = c(" Amer-Indian-Eskimo", " Asian-Pac-Islander",
" Black", " Other", " White"), class = "factor"), sex = structure(c(2L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L), .Label = c(" Female",
" Male"), class = "factor"), capGain = c(2174L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 14084L, 5178L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5013L, 2407L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 14344L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), capLoss = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 2042L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1408L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1902L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1573L, 0L, 0L, 1902L, 0L, 0L, 0L), hours = c(40L,
13L, 40L, 40L, 40L, 40L, 16L, 45L, 50L, 40L, 80L, 40L, 30L,
50L, 40L, 45L, 35L, 40L, 50L, 45L, 60L, 20L, 40L, 40L, 40L,
40L, 40L, 60L, 80L, 40L, 52L, 44L, 40L, 40L, 15L, 40L, 40L,
25L, 38L, 40L, 43L, 40L, 50L, 40L, 35L, 40L, 38L, 40L, 40L,
43L, 40L, 30L, 60L, 55L, 60L, 40L, 40L, 40L, 48L, 40L, 40L,
40L, 40L, 45L, 58L, 40L, 40L, 40L, 50L, 40L, 32L, 40L, 70L,
40L, 20L, 40L, 40L, 2L, 22L, 40L, 30L, 40L, 40L, 48L, 40L,
35L, 40L, 50L, 40L, 50L, 40L, 40L, 25L, 35L, 40L, 50L, 60L,
48L, 40L, 40L), country = structure(c(40L, 40L, 40L, 40L,
6L, 40L, 24L, 40L, 40L, 40L, 40L, 20L, 40L, 40L, 1L, 27L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 36L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 34L, 40L, 40L, 1L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 1L,
17L, 40L, 40L, 40L, 27L, 34L, 40L, 40L, 40L, 1L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 27L,
40L, 40L, 40L, 40L, 40L, 6L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 1L, 40L, 40L, 40L, 40L, 10L, 40L
), .Label = c(" ?", " Cambodia", " Canada", " China", " Columbia",
" Cuba", " Dominican-Republic", " Ecuador", " El-Salvador",
" England", " France", " Germany", " Greece", " Guatemala",
" Haiti", " Holand-Netherlands", " Honduras", " Hong", " Hungary",
" India", " Iran", " Ireland", " Italy", " Jamaica", " Japan",
" Laos", " Mexico", " Nicaragua", " Outlying-US(Guam-USVI-etc)",
" Peru", " Philippines", " Poland", " Portugal", " Puerto-Rico",
" Scotland", " South", " Taiwan", " Thailand", " Trinadad&Tobago",
" United-States", " Vietnam", " Yugoslavia"), class = "factor")), .Names = c("age",
"workClass", "fnlwgt", "education", "educationNum", "marital",
"occ", "relationship", "race", "sex", "capGain", "capLoss", "hours",
"country"), row.names = c(NA, 100L), class = "data.frame")
答案 0 :(得分:1)
您提供的代码构造了一个1级(<=50k
)的因子,因为第一个向量输入仅包含1L
s。您应该相应地分配这些标签,或者使用更简单的方法来构建您的响应变量 - 例如trainY <- as.factor(...)
我改变了trainY
的构建方式:
y <- structure(c(1L, 2L, 1L, 1L, 2L, 1L), .Label = c(" <=50K", " >50K"), class = "factor")
并且在使用相同的命令重新训练树之后:
Decision tree:
V14 = Cuba: >50K (1)
V14 in {?,Cambodia,Canada,China,Columbia,Dominican-Republic,Ecuador,
El-Salvador,England,France,Germany,Greece,Guatemala,Haiti,
Holand-Netherlands,Honduras,Hong,Hungary,India,Iran,Ireland,Italy,
Jamaica,Japan,Laos,Mexico,Nicaragua,Outlying-US(Guam-USVI-etc),Peru,
Philippines,Poland,Portugal,Puerto-Rico,Scotland,South,Taiwan,Thailand,
Trinadad&Tobago,United-States,Vietnam,Yugoslavia}: <=50K (5/1)
将args传递给C5.0
时,请确保响应中没有一个类。 HTH
<强>更新强>
在绘制了一些预测变量与响应之后,我注意到education
和educationNum
显示了数据中最清晰的分割(Doctorate
立即暗示>50K
)。下一步是调整一些非常有用的C5.0 Control
选项 - 它们在C5.0
package documentation和官方非正式tutorial page中有详细记录 - 检查它们会给你带来广泛的帮助控制分类控制。
例如:
C5.0(x = trainX,y = trainY,control = C5.0Control(subset = T, winnow = T,minCases = 4,fuzzyThreshold = T))
Decision tree:
educationNum <= 13 (14.5): <=50K (95/20)
educationNum >= 16 (14.5): >50K (5)
类似地,做一些“特征工程”,在这种情况下意味着只留下原始数据框中的一些列:
C5.0(x = trainX[ ,c(1:5, 9:13)], y = trainY)
Decision tree:
educationNum <= 14: <=50K (95/20)
educationNum > 14: >50K (5)
我相信没有一个通用的“开箱即用”C5.0
默认设置可以为各种问题产生令人满意的结果,所以它真的归结为尝试不同的参数设置,功能等。 ..但与所有事物R
一样,周围有很多材料可以给你一些方向。