我在R中使用朴素贝叶斯。我有泰坦尼克号集,但predict()函数产生错误

时间:2017-04-16 17:00:36

标签: r naivebayes

library(e1071)
m1 <- naiveBayes(Survived ~ ., data =tr) # tr is the training set
Prediction1<-predict(m1,tst)  #tst is the test set
Error in `[.default`(object$tables[[v]], , nd +    islogical[attribs[v]]) : 
subscript out of bounds
In addition: Warning messages:
1: In data.matrix(newdata) : NAs introduced by coercion
2: In data.matrix(newdata) : NAs introduced by coercion
3: In data.matrix(newdata) : NAs introduced by coercion
4: In data.matrix(newdata) : NAs introduced by coercion
5: In data.matrix(newdata) : NAs introduced by coercion
6: In data.matrix(newdata) : NAs introduced by coercion
7: In data.matrix(newdata) : NAs introduced by coercion
8: In data.matrix(newdata) : NAs introduced by coercion
9: In data.matrix(newdata) : NAs introduced by coercion

tr是具有17列和891行的训练集。其中一个专栏是Survived,充满了零和一些,取决于泰坦尼克号的乘客是否幸存。 tst是测试集具有相同的17列和418行,其中幸存列是NA。它是NA,因为您想要预测它,然后将您找到的内容与kaggle.com进行比较。 tst和tr都是data.frames。这里有什么错误?我阅读了naiveBayes的手册,我试图将我的数据转换为因子,但没有任何事情发生。提前致谢

这是tr:https://www.dropbox.com/s/riklgjabppqa0om/tr.png?dl=0

这是tst:https://www.dropbox.com/s/9juvs6g630181tg/tst.png?dl=0

dput(head(tr,20)) 结构(列表(PassengerId = 1:20,Survived = c(0L,1L,1L,1L, 0L,0L,0L,0L,1L,1L,1L,1L,0L,0L,0L,1L,0L,1L,0L,1L ),Pclass = c(3L,1L,3L,1L,3L,3L,1L,3L,3L,2L,3L,1L, 3L,3L,3L,2L,3L,2L,3L,3L),名称= c(&#34; Braund,Owen Harris先生&#34;, &#34; Cumings,John Bradley夫人(Florence Briggs Thayer)&#34;,&#34; Heikkinen,Miss.Laina&#34;, &#34; Futrelle,Jacques Heath夫人(Lily May Peel)&#34;,&#34; Allen,William Henry先生&#34;, &#34;莫兰,詹姆斯先生,#34;麦卡锡,蒂莫西J&#34;,&#34;帕尔森,硕士。 Gosta Leonard&#34;, &#34; Johnson,Oscar W女士(Elisabeth Vilhelmina Berg)&#34;,&#34; Nasser,Nicholas夫人(Adele Achem)&#34;, &#34; Sandstrom,Miss.Marguerite Rut&#34;,&#34; Bonnell,Miss.Ilizabeth&#34;, &#34; Saundercock,William Henry先生&#34;,#34; Andersson,Anders Johan先生&#34;, &#34; Vestrom,小姐.Hulda Amanda Adolfina&#34;,&#34; Hewlett,Mrs。(Mary D Kingcome)&#34;, &#34;赖斯,硕士。尤金&#34;,&#34;威廉姆斯,查尔斯尤金先生&#34;,#34;范德普拉克,朱利叶斯夫人(Emelia Maria Vandemoortele)&#34;, &#34; Masselmani,Fatima夫人&#34;),性别= c(&#34;男性&#34;,&#34;女性&#34;,&#34;女性&#34;, &#34;女性&#34;,&#34;男性&#34;,&#34;男性&#34;,&#34;男性&#34;,&#34;男性&#34;,&#34;女性& #34;,&#34;女性&#34;, &#34;女性&#34;,&#34;女性&#34;,&#34;男性&#34;,&#34;男性&#34;,&#34;女性&#34;,&#34;女性& #34;,&#34;男性&#34;, &#34;男性&#34;,&#34;女性&#34;,&#34;女性&#34;),年龄= c(22,38,26,35,35,NA, 54,2,27,14,4,58,20,39,14,55,2,NA,31,NA),SibSp = c(1L, 1L,0L,1L,0L,0L,0L,3L,0L,1L,1L,0L,0L,1L,0L,0L,4L, 0L,1L,0L),Parch = c(0L,0L,0L,0L,0L,0L,0L,1L,2L,0L, 1L,0L,0L,5L,0L,0L,1L,0L,0L,0L),票证= c(&#34; A / 5 21171&#34;, &#34; PC 17599&#34;,&#34; STON / O2。 3101282&#34;,&#34; 113803&#34;,&#34; 373450&#34;,&#34; 330877&#34;, &#34; 17463&#34;,&#34; 349909&#34;,&#34; 347742&#34;,&#34; 237736&#34;,&#34; PP 9549&#34;,&#34; 113783&#34;,&#34; A / 5。 2151&#34 ;, &#34; 347082&#34;,&#34; 350406&#34;,&#34; 248706&#34;,&#34; 382652&#34;,&#34; 244373&#34;,&#34; 345763& #34;,&#34; 2649&#34; ),票价= c(7.25,71.2833,7.925,53.1,8.05,8.4583,51.8625, 21.075,11.1333,30.0708,16.7,26.55,8.05,31.275,7.8542, 16,29.125,13,​​18,7.225),Cabin = c(NA,&#34; C85&#34;,NA,&#34; C123&#34;, NA,NA,&#34; E46&#34;,NA,NA,NA,&#34; G6&#34;,&#34; C103&#34;,NA,NA,NA,NA,NA, NA,NA,NA),已开始= c(&#34; S&#34;,&#34; C&#34;,&#34; S&#34;,&#34; S&#34;,&#34 ; S&#34;,&#34; Q&#34;,&#34; S&#34;, &#34; S&#34;,&#34; S&#34;,&#34; C&#34;,&#34; S&#34;,&#34; S&#34;,&#34; S& #34;,&#34; S&#34;,&#34; S&#34;,&#34; S&#34;,&#34; Q&#34;,&#34; S&#34;,& #34; S&#34;,&#34; C&#34; )),。Name = c(&#34; PassengerId&#34;,&#34; Survived&#34;,&#34; Pclass&#34;,&#34; Name&#34;,&#34; Sex&# 34 ;, &#34;年龄&#34;,&#34; SibSp&#34;,&#34; Parch&#34;,&#34; Ticket&#34;,&#34; Fare&#34;,&#34; Cabin& #34;,&#34;开始&#34; ),class = c(&#34; data.table&#34;,&#34; data.frame&#34;),row.names = c(NA,-20L ),。internal.selfref =)

dput(head(tst,20)) 结构(列表(PassengerId = 892:911,Pclass = c(3L,3L,2L, 3L,3L,3L,3L,2L,3L,3L,3L,1L,1L,2L,1L,2L,2L,3L,3L, 3L),Name = c(&#34; Kelly,James先生&#34;,&#34; Wilkes,James夫人(Ellen Needs)&#34;, &#34; Myles,Thomas Francis先生&#34;,#34; Wirz,Albert先生&#34;,#34; Hirvonen,Mrs。Alexander(Helga E Lindqvist)&#34;, &#34; Svensson,Johan Cervin先生&#34;&#34; Connolly,Miss.Kate&#34;,&#34; Caldwell,Albert Francis先生&#34;, &#34; Abrahim,Joseph夫人(Sophie Halaut Easu)&#34;,&#34;戴维斯,John Samuel先生&#34;, &#34; Ilieff,Ylio先生&#34;&#34;琼斯,Charles Cresson先生&#34;,&#34; Snyder,John Pillsbury夫人(Nelle Stevenson)&#34;, &#34; Howard,Benjamin先生&#34;,&#34; Chaffee,Herbert Fuller夫人(Carrie Constance Toogood)&#34;, &#34; del Carlo,Sebastiano夫人(Argenia Genovesi)&#34;,&#34; Keane,Daniel先生&#34;, &#34; Assaf,Gerios先生&#34;,&#34; Ilmakangas,Miss.Ida Livija&#34;,&#34; Assaf Khalil,Mariana夫人(Miriam \&#34;)\&#34; &#34; ),性别= c(&#34;男性&#34;,&#34;女性&#34;,&#34;男性&#34;,&#34;男性&#34;,&#34;女性&#34; ,&#34;男性&#34;, &#34;女性&#34;,&#34;男性&#34;,&#34;女性&#34;,&#34;男性&#34;,&#34;男性&#34;,&#34;男性& #34;,&#34;女性&#34;, &#34;男性&#34;,&#34;女性&#34;,&#34;女性&#34;,&#34;男性&#34;,&#34;男性&#34;,&#34;女性& #34;,&#34;女性&#34; ),年龄= c(34.5,47,62,27,22,14,30,26,18,21,32.1505376344086, 46,23,63,47,24,35,21,27,45),SibSp = c(0L,1L,0L,0L, 1L,0L,0L,1L,0L,2L,0L,0L,1L,1L,1L,1L,0L,0L,1L,0L ),Parch = c(0L,0L,0L,0L,1L,0L,0L,1L,0L,0L,0L,0L, 0L,0L,0L,0L,0L,0L,0L,0L),Ticket = c(&#34; 330911&#34;,&#34; 363272&#34;, &#34; 240276&#34;,&#34; 315154&#34;,&#34; 3101298&#34;,&#34; 7538&#34;,&#34; 330972&#34;,&#34; 248738& #34;,&#34; 2657&#34;, &#34; A / 4 48871&#34;,&#34; 349220&#34;,&#34; 694&#34;,&#34; 21228&#34;,&#34; 24065&#34;,&# 34; WEP 5734&#34 ;, &#34; SC / PARIS 2167&#34;,&#34; 233734&#34;,&#34; 2692&#34;,&#34; STON / O2。 3101270&#34;,&#34; 2696&#34; ),票价= c(7.8292,7,7.6875,8.6625,12.2875,9.225,7.6292, 29,7.2292,24.15,7.8958,26,82.2667,26,61.175,27.7208, 12.35,7.225,7.925,7.225),Cabin = c(NA,NA,NA,NA,NA,NA, NA,NA,NA,NA,NA,NA,&#34; B45&#34;,NA,&#34; E31&#34;,NA,NA,NA,NA,NA),     已开始= c(&#34; Q&#34;,&#34; S&#34;,&#34; Q&#34;,&#34; S&#34;,&#34; S&#34;,& #34; S&#34;,&#34; Q&#34;,&#34; S&#34;,&#34; C&#34;,     &#34; S&#34;,&#34; S&#34;,&#34; S&#34;,&#34; S&#34;,&#34; S&#34;,&#34; S& #34;,&#34; C&#34;,&#34; Q&#34;,&#34; C&#34;,&#34; S&#34;,&#34; C&#34;),幸存= c(NA,     NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,     NA,NA,NA,NA),Title = c(&#34; Mr&#34;,&#34; Mrs&#34;,&#34; Mr&#34;,&#34; Mr&#34;,& #34;杜&#34 ;,     &#34; Mr&#34;,&#34; Miss&#34;,&#34; Mr&#34;,&#34; Mrs&#34;,&#34; Mr&#34;,&#34; Mr& #34;,&#34; Mr&#34;,&#34; Mrs&#34;,&#34; Mr&#34;,     &#34; Mrs&#34;,&#34; Mrs&#34;,&#34; Mr&#34;,&#34; Mr&#34;,&#34; Miss&#34;,&#34; Mrs && #34;),TotalFamily = c(1,     2,1,1,3,1,1,3,1,3,1,1,2,2,2,2,1,1,2,1),     FamSize = c(&#34; Small&#34;,&#34; Small&#34;,&#34; Small&#34;,&#34; Small&#34;,&#34; Medium&#34;,     &#34; Small&#34;,&#34; Small&#34;,&#34; Medium&#34;,&#34; Small&#34;,&#34; Medium&#34;,&#34; Small& #34;,&#34;小&#34;,     &#34; Small&#34;,&#34; Small&#34;,&#34; Small&#34;,&#34; Small&#34;,&#34; Small&#34;,&#34; Small& #34;,&#34;小&#34;,     &#34; Small&#34;),FarePrice = c(&#34; Low&#34;,&#34; Low&#34;,&#34; Low&#34;,&#34; Low&#34;, &#34;中&#34 ;,     &#34;低&#34;,&#34;低&#34;,&#34;高&#34;,&#34;低&#34;,&#34;高&#34;,&#34;低&& #34;,&#34;高&#34;,&#34;昂贵&#34;,     &#34;高&#34;,&#34;昂贵&#34;,&#34;高&#34;,&#34;中&#34;,&#34;低&#34;,&#34;低&& #34;,&#34;低&#34;     ),AgeNew = c(&#34;成人&#34;,&#34;老&#34;,&#34;老&#34;,&#34;成人&#34;,&#34;成人&#34; ,&#34;儿童&#34;,     &#34;成人&#34;,&#34;成人&#34;,&#34;成人&#34;,&#34;成人&#34;,&#34;成人&#34;,&#34;老& #34;,&#34;成人&#34;,     &#34; Old&#34;,&#34; Old&#34;,&#34; Adult&#34;,&#34; Adult&#34;,&#34; Adult&#34;,&#34; Adult& #34;,&#34; Old&#34;)),。Name = c(&#34; PassengerId&#34;, &#34; Pclass&#34;,&#34; Name&#34;,&#34; Sex&#34;,&#34; Age&#34;,&#34; SibSp&#34;,&#34; Parch& #34;,&#34; Ticket&#34;,&#34; Fare&#34;, &#34; Cabin&#34;,&#34;开始&#34;,&#34;生存&#34;,&#34;标题&#34;,&#34; TotalFamily&#34;,&#34; FamSize& #34 ;, &#34; FarePrice&#34;,&#34; AgeNew&#34;),class = c(&#34; data.table&#34;,&#34; data.frame&#34;),row.names = C(NA, -20L),. internal.selfref =)

1 个答案:

答案 0 :(得分:0)

在使用朴素贝叶斯和寻找混淆矩阵时,我遇到了类似的问题。就我而言,结果变量的类型为Char。我将其转换为因子。然后,似乎工作正常。 字符字段与因素不同。尝试跟随,

as.factor(Survived)