spam <- read.csv("spam.csv")
names(spam) <-c ("w_make","w_address","w_all","w_3d","w_our","w_over","w_remove","w_internet","w_order","w_mail","w_receive","w_will","w_people","w_report", "w_addresses","w_free", "w_business", "w_email","w_you", "w_credit", "w_your", "w_font", "w_000", "w_money", "w_hp", "w_hpl", "w_george", "w_650", "w_lab", "w_labs", "w_telnet", "w_857", "w_data", "w_415", "w_85", "w_technology", "w_1999", "w_parts", "w_pm", "w_direct", "w_cs", "w_meeting", "w_original", "w_project", "w_re", "w_edu", "w_table", "w_conference", "c_semicolon", "c_roundparen", "c_squareparen", "c_exclaim", "c_dollar", "c_hash", "caps_avg", "caps_long", "caps_total", "spam")
yspam <- spam$spam
nspam <- nrow(spam)
null <- glm(yspam ~ 1, family=binomial(link=logit), data=spam)
full <- glm(yspam ~ . + .^2, family=binomial(link=logit), data=spam)
fwd <- step(null, scope=formula(yspam ~ .),
direction="forward", k=log(nspam))
我正在尝试基于具有58个协变量的数据集(包括电子邮件是否为垃圾邮件)来生成glm模型,以预测电子邮件是否为垃圾邮件。我试图使用R中的阶跃函数生成最佳模型,以获取具有最低BIC值的模型,但我不断收到错误消息: glm.fit:算法未收敛。fit:出现了数值为0或1的拟合概率
有什么建议吗?