我试图在Weka中使用J48分类器,但它将所有内容归类为0。
这是我的DataSet:
@relation 'SimpleRules-weka.filters.unsupervised.attribute.Reorder-R1,2,3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,6-weka.filters.unsupervised.attribute.Remove-R1-weka.filters.unsupervised.attribute.Remove-R1-weka.filters.unsupervised.attribute.Remove-R3-weka.filters.unsupervised.attribute.Remove-R1-2'
@attribute R1 numeric
@attribute R2 numeric
@attribute R3 numeric
@attribute R4 numeric
@attribute R5 numeric
@attribute R6 numeric
@attribute R7 numeric
@attribute R8 numeric
@attribute R9 numeric
@attribute Rank numeric
@attribute R1R5R6 numeric
@attribute R1R6R7 numeric
@attribute CombinedRules numeric
@attribute Demoday {0,1}
@data
1,1,1,1,1,0,1,1,0,11,12,0,0,0
1,1,1,1,0,1,0,1,0,72,1,0,0,0
0,0,0,1,0,1,1,1,0,47,7,0,0,1
1,1,0,1,1,1,0,1,1,68,12,1,0,0
1,1,1,1,1,1,0,1,1,21,7,1,0,0
1,1,1,1,0,1,1,1,1,63,11,0,1,0
1,1,0,1,0,1,1,1,0,19,7,0,1,0
1,0,1,1,0,0,1,1,1,11,7,0,0,0
0,1,1,1,0,1,0,1,0,107,12,0,0,0
1,1,1,1,0,1,0,1,0,99,12,0,0,1
0,1,1,1,0,1,1,1,1,238,2,0,0,0
1,1,1,1,1,0,1,1,0,147,7,0,0,0
1,1,1,1,1,1,0,1,1,30,7,1,0,1
1,1,1,1,0,0,0,1,1,124,5,0,0,0
0,1,1,1,1,1,0,1,1,54,5,0,0,0
0,0,0,1,0,1,0,1,0,153,5,0,0,0
1,1,1,1,0,1,0,1,1,33,5,0,0,0
1,1,1,1,1,1,0,1,0,143,3,1,0,0
1,0,1,1,0,1,1,1,0,28,3,0,1,0
0,1,1,1,0,1,1,0,0,83,8,0,0,0
1,1,1,1,1,1,1,1,0,31,7,1,1,0
1,1,1,1,0,0,0,1,0,91,12,0,0,0
1,1,1,1,0,0,1,1,0,7,7,0,0,0
1,1,1,1,0,1,0,1,1,4,1,0,0,0
1,1,0,1,1,1,0,1,0,41,1,1,0,0
0,1,1,1,0,1,0,1,1,84,5,0,0,0
1,1,0,1,0,1,1,1,0,81,1,0,1,1
0,1,1,1,1,1,0,1,1,8,6,0,0,0
1,1,1,1,0,1,1,1,1,172,11,0,1,0
1,1,1,1,1,0,0,1,1,142,12,0,0,1
0,1,1,1,0,1,1,1,1,35,11,0,0,0
1,1,1,1,0,1,0,1,1,130,11,0,0,0
1,1,1,1,0,1,1,1,1,62,7,0,1,0
0,1,1,1,0,1,1,1,1,34,7,0,0,0
0,1,1,1,0,1,1,1,1,108,3,0,0,0
0,1,1,1,0,1,0,1,1,11,12,0,0,0
0,1,1,1,1,0,0,1,1,129,3,0,0,0
1,1,0,1,0,1,1,1,1,24,10,0,1,1
1,1,1,1,0,1,1,1,0,50,8,0,1,0
1,1,1,1,1,1,0,1,1,12,12,1,0,1
0,1,1,1,1,1,1,1,0,111,3,0,0,0
1,1,0,1,0,1,0,1,1,55,11,0,0,0
1,1,1,1,0,1,0,1,1,239,11,0,0,0
0,1,1,1,1,1,0,1,0,131,2,0,0,0
1,1,1,1,0,1,0,1,1,328,8,0,0,0
1,1,1,1,0,1,1,1,1,12,12,0,1,1
1,1,1,1,0,1,0,1,1,113,8,0,0,0
0,1,1,1,0,1,0,1,0,96,1,0,0,0
1,1,1,1,0,0,0,1,1,75,7,0,0,0
1,1,1,1,1,1,0,1,1,67,1,1,0,1
1,1,1,1,1,1,0,1,0,112,11,1,0,0
1,1,1,1,0,0,1,1,1,109,3,0,0,0
1,0,1,1,1,0,0,1,0,47,12,0,0,0
1,1,1,1,0,1,0,1,1,47,7,0,0,0
1,1,1,1,0,1,0,1,1,2,6,0,0,0
0,0,0,1,0,1,1,1,0,16,2,0,0,0
1,1,1,1,0,1,0,1,0,18,12,0,0,0
0,1,1,1,1,1,1,1,0,58,3,0,0,0
0,0,0,1,1,1,0,1,0,156,7,0,0,0
1,1,1,1,1,0,1,1,0,279,2,0,0,0
1,1,1,1,0,1,0,1,0,2,12,0,0,0
0,0,1,1,0,1,0,1,1,163,6,0,0,0
1,1,1,1,1,1,0,1,1,10,3,1,0,1
0,0,1,1,1,1,0,1,0,3,12,0,0,0
1,1,1,1,1,1,0,1,1,101,7,1,0,0
1,1,1,1,0,1,0,1,0,136,9,0,0,0
0,1,1,1,1,0,0,1,0,31,8,0,0,0
1,0,1,1,1,1,0,1,0,155,8,1,0,0
0,1,1,1,0,1,1,1,0,158,12,0,0,0
0,1,0,1,0,1,0,1,0,101,1,0,0,0
0,1,0,1,0,1,0,1,1,7,7,0,0,0
1,0,0,1,1,1,0,1,0,23,1,1,0,0
1,0,0,1,1,0,0,1,1,99,1,0,0,0
1,1,1,1,1,1,0,1,1,73,3,1,0,0
1,1,1,1,1,1,0,1,0,15,3,1,0,0
1,1,1,1,0,1,1,1,0,97,8,0,1,0
1,1,1,1,0,1,1,1,1,93,8,0,1,0
1,1,1,1,1,1,1,1,0,44,7,1,1,1
0,1,1,1,0,1,0,1,0,239,7,0,0,0
0,0,0,1,1,1,0,1,1,35,1,0,0,0
0,1,1,1,0,1,0,1,0,90,12,0,0,0
1,1,1,1,1,1,0,1,1,37,7,1,0,0
1,1,1,1,1,0,0,1,1,25,12,0,0,1
1,1,1,1,0,0,0,1,0,83,2,0,0,0
1,1,1,1,1,1,1,1,1,22,10,1,1,1
1,1,1,1,1,0,1,1,1,2,10,0,0,0
1,0,1,1,0,1,1,1,1,65,5,0,1,0
0,1,1,1,0,1,1,1,1,25,3,0,0,0
1,0,1,1,0,0,1,1,0,180,8,0,0,0
0,1,0,1,0,1,1,1,1,49,10,0,0,0
0,0,1,1,0,1,0,1,0,67,8,0,0,0
1,1,1,1,1,0,1,1,0,14,11,0,0,0
1,0,0,1,1,1,0,1,0,36,11,1,0,0
0,0,0,1,0,0,1,1,1,97,9,0,0,0
0,0,0,1,0,1,1,1,0,193,1,0,0,0
0,0,1,1,1,1,1,1,1,83,6,0,0,0
0,1,1,1,0,1,0,1,1,13,12,0,0,0
1,1,1,1,0,1,0,1,0,49,5,0,0,0
1,0,1,1,1,1,1,1,1,1,8,1,1,1
1,0,1,1,0,1,0,1,1,159,10,0,0,0
1,1,1,1,1,1,1,1,0,51,7,1,1,0
1,1,1,1,1,1,0,1,1,168,6,1,0,0
0,1,1,1,0,1,0,1,1,100,5,0,0,0
0,0,0,1,0,0,1,1,0,30,3,0,0,0
1,1,0,1,0,1,1,1,0,27,12,0,1,0
1,1,1,1,0,1,0,1,1,34,11,0,0,0
0,1,0,1,0,1,1,1,1,101,3,0,0,0
1,0,1,1,0,1,0,1,1,111,11,0,0,0
1,1,1,1,1,1,0,1,0,51,2,1,0,0
1,1,1,1,0,0,0,1,0,233,12,0,0,0
1,1,1,1,1,0,0,1,1,98,11,0,0,0
0,1,1,1,0,1,0,1,0,24,1,0,0,0
1,1,1,1,0,0,1,1,1,181,2,0,0,0
1,1,1,1,1,1,0,1,1,14,6,1,0,0
0,1,1,1,1,1,1,1,1,96,1,0,0,0
1,1,1,1,0,1,1,1,1,139,12,0,1,1
1,1,1,1,1,1,1,1,1,155,8,1,1,0
1,1,1,1,1,1,0,1,0,53,7,1,0,1
0,1,1,1,0,1,1,1,0,17,8,0,0,0
1,1,1,1,0,1,1,1,0,39,6,0,1,0
0,0,1,1,0,1,0,1,0,282,12,0,0,0
1,0,1,1,1,0,0,1,1,132,7,0,0,0
1,1,1,1,0,0,0,1,0,57,11,0,0,0
1,0,0,1,0,1,1,1,1,165,7,0,1,1
0,1,0,1,0,1,1,1,1,74,10,0,0,0
0,1,1,1,0,1,1,1,0,150,7,0,0,0
1,0,1,1,1,1,0,1,1,53,2,1,0,0
1,1,1,1,1,1,0,1,1,42,12,1,0,1
1,1,1,1,1,0,1,1,1,234,7,0,0,0
1,1,1,1,0,0,1,1,1,164,10,0,0,0
1,1,1,1,0,0,0,1,0,69,3,0,0,0
1,1,1,1,0,0,1,1,0,38,5,0,0,0
1,0,0,1,0,1,0,1,1,56,7,0,0,0
1,1,0,1,0,1,0,1,1,63,1,0,0,0
1,1,1,1,1,1,1,1,0,9,1,1,1,0
1,0,1,1,0,1,0,1,1,23,11,0,0,0
1,1,1,1,1,0,0,1,0,46,7,0,0,0
1,1,1,1,0,0,0,1,1,59,12,0,0,0
1,1,0,1,0,1,0,1,1,27,1,0,0,0
0,1,1,1,1,1,0,1,1,4,12,0,0,0
1,1,0,1,0,1,0,1,0,132,12,0,0,0
1,1,0,1,1,1,1,1,1,78,5,1,1,0
1,1,1,1,0,1,1,1,1,32,12,0,1,0
0,1,1,1,1,1,0,1,0,104,7,0,0,0
1,1,1,1,0,1,1,1,0,117,12,0,1,0
0,1,0,1,0,1,0,1,1,185,7,0,0,0
1,1,0,1,0,1,1,1,0,38,4,0,1,0
1,1,0,1,1,0,1,1,1,8,12,0,0,0
0,1,1,1,1,1,1,1,0,80,4,0,0,1
1,0,0,1,1,0,1,1,0,12,11,0,0,0
0,0,1,1,0,1,1,1,0,70,12,0,0,0
1,1,1,1,1,0,0,1,1,76,3,0,0,0
0,1,1,1,0,1,1,1,0,23,11,0,0,0
1,1,0,1,1,1,0,1,0,40,7,1,0,0
1,1,1,1,0,0,1,1,1,159,12,0,0,0
1,1,1,1,0,1,0,1,0,49,12,0,0,1
0,0,1,1,0,1,1,1,1,37,7,0,0,1
1,1,0,1,0,1,1,1,1,147,9,0,1,0
1,1,1,1,0,0,0,1,0,87,3,0,0,0
1,1,1,1,1,1,0,1,0,7,1,1,0,0
0,0,1,1,0,1,1,1,0,167,3,0,0,0
0,1,1,1,0,1,1,1,0,6,3,0,0,0
0,1,1,1,1,1,0,1,0,39,7,0,0,0
1,1,1,1,1,0,0,1,1,88,11,0,0,0
0,0,1,1,1,1,1,1,0,175,12,0,0,0
1,1,1,0,0,1,0,1,0,127,12,0,0,0
1,1,1,1,0,1,1,1,0,1,11,0,1,0
1,1,1,1,0,0,0,1,1,77,7,0,0,0
1,1,1,1,1,0,0,1,1,122,5,0,0,0
1,0,1,1,0,1,1,1,0,155,8,0,1,1
1,1,0,0,0,1,1,1,1,114,4,0,1,0
0,1,1,1,1,0,0,1,0,106,7,0,0,1
1,1,1,1,1,1,1,1,0,16,7,1,1,1
1,0,0,1,0,1,0,1,0,176,6,0,0,0
1,0,1,1,0,0,1,1,1,47,2,0,0,0
0,0,0,1,0,1,0,1,0,95,6,0,0,0
1,1,1,1,0,1,1,1,0,233,11,0,1,0
1,1,1,1,0,1,1,1,0,27,1,0,1,0
1,1,1,1,0,1,0,1,1,85,8,0,0,1
0,0,0,0,0,1,0,1,1,58,3,0,0,0
1,0,1,1,1,1,0,1,1,102,11,1,0,0
1,1,1,1,1,0,0,1,1,33,12,0,0,0
0,1,1,1,0,1,0,1,0,92,12,0,0,0
1,0,1,1,1,1,0,1,0,20,5,1,0,0
1,1,1,1,1,1,1,1,1,8,8,1,1,1
1,1,1,1,1,1,1,1,1,3,12,1,1,0
1,1,0,1,0,1,1,1,0,16,12,0,1,0
1,1,1,1,0,1,1,1,0,143,12,0,1,0
1,1,0,1,0,1,0,1,1,84,3,0,0,0
1,1,1,1,0,1,1,1,1,149,7,0,1,0
1,1,1,1,0,0,1,1,0,14,3,0,0,0
1,0,1,1,0,1,1,1,0,37,9,0,1,0
0,1,1,1,0,0,0,1,1,137,1,0,0,0
1,0,1,1,0,1,0,1,1,121,1,0,0,0
1,0,0,1,0,1,1,1,1,21,3,0,1,0
1,1,1,1,1,0,1,1,1,23,5,0,0,0
1,0,1,1,0,1,1,1,0,40,11,0,1,0
1,1,1,1,0,1,1,1,1,82,6,0,1,1
1,1,1,1,0,0,1,1,1,106,12,0,0,0
0,0,1,1,1,0,0,1,1,62,7,0,0,0
1,1,1,1,0,1,0,1,0,90,1,0,0,0
1,1,1,1,0,1,1,1,0,26,12,0,1,1
0,1,1,1,0,1,1,1,0,49,11,0,0,0
0,1,1,1,0,1,0,1,1,67,7,0,0,0
1,1,1,1,0,0,1,1,1,120,3,0,0,0
1,1,1,1,1,1,1,1,0,92,1,1,1,0
1,1,0,1,1,1,0,1,0,22,5,1,0,1
1,1,1,1,0,0,1,1,0,130,1,0,0,0
1,1,1,1,0,1,1,1,1,135,3,0,1,0
1,1,0,1,0,1,0,1,1,94,6,0,0,0
0,1,1,1,1,0,0,1,0,63,3,0,0,0
1,1,1,1,0,1,1,1,1,40,3,0,1,0
1,1,1,1,0,1,0,1,1,512,12,0,0,0
1,1,0,1,0,1,0,1,1,60,10,0,0,1
0,0,0,1,0,0,1,1,0,154,11,0,0,0
1,1,1,1,1,0,1,1,0,117,3,0,0,1
1,1,1,1,1,1,0,1,1,198,3,1,0,0
1,0,1,1,1,1,1,1,0,51,2,1,1,0
1,0,0,1,1,0,1,1,1,53,1,0,0,0
1,1,0,1,0,1,1,1,0,115,12,0,1,0
1,1,1,1,1,0,0,1,1,86,1,0,0,0
1,1,1,1,1,1,1,1,0,65,5,1,1,0
0,1,1,1,1,1,1,1,1,51,6,0,0,0
1,1,1,1,0,0,0,1,0,41,2,0,0,0
1,1,1,1,0,1,1,1,1,104,3,0,1,0
0,1,1,1,1,0,0,1,0,44,9,0,0,0
1,0,0,1,1,0,0,1,1,145,2,0,0,0
1,1,1,1,0,1,1,1,0,199,10,0,1,1
1,1,0,1,0,1,1,1,1,3,3,0,1,0
1,1,1,1,0,0,0,1,0,10,5,0,0,0
1,1,1,1,1,1,0,1,1,81,7,1,0,0
1,1,0,0,0,1,0,1,0,164,6,0,0,0
0,1,1,1,0,1,1,1,1,122,5,0,0,0
1,1,1,1,1,1,0,1,1,188,3,1,0,0
1,1,1,1,0,0,0,1,1,149,5,0,0,0
1,1,1,1,0,0,0,1,1,152,12,0,0,0
1,1,1,1,0,1,1,1,1,5,10,0,1,0
1,0,1,1,1,0,0,1,0,35,5,0,0,0
1,1,1,1,0,0,1,1,1,12,4,0,0,0
以下是使用10倍交叉验证运行j48算法后的结果
Correctly Classified Instances 205 85.7741 %
Incorrectly Classified Instances 34 14.2259 %
Kappa statistic 0.0266
Mean absolute error 0.2346
Root mean squared error 0.3465
Relative absolute error 100.0672 %
Root relative squared error 101.7226 %
Coverage of cases (0.95 level) 99.5816 %
Mean rel. region size (0.95 level) 99.3724 %
Total Number of Instances 239
=== Detailed Accuracy By Class ===
TP Rate FP Rate Precision Recall F-Measure MCC ROC Area PRC Area Class
0,986 0,969 0,868 0,986 0,923 0,044 0,492 0,865 0
0,031 0,014 0,250 0,031 0,056 0,044 0,492 0,135 1
Weighted Avg. 0,858 0,841 0,785 0,858 0,807 0,044 0,492 0,767
=== Confusion Matrix ===
a b <-- classified as
204 3 | a = 0
31 1 | b = 1
它生成的树https://www.dropbox.com/s/qzjukr8klffwl90/Captura%20de%20pantalla%202014-04-15%2022.33.38.png
我希望你能帮助我解决这个问题,
非常感谢,
答案 0 :(得分:1)
这是经典的类不平衡问题。看看你的班级分配Demoday = 1 32条记录,Demoday = 0 207条记录。几乎每种机器学习算法都旨在实现最佳的整体精度。因此,在您的情况下,如果它为每个实例分配0,它会产生85.7%的准确率,这正是您获得的单叶树。问题当然是少数群体通常更感兴趣。谷歌不平衡的阶级,或类不平衡的更多信息。 我已经发布了一些快速解决方案以及适合此案例的一些模型性能指标作为答案:how to edit weka configurations to find "1"
祝你好运