所以我运行这个循环:(我的数据文件非常大,.csv)
for (i in 1:365){
race.date<-as.Date(meeting_date,"%d/%m/%Y")
indicator1<-race.date<=as.Date("31/12/2007","%d/%m/%Y")+i
model.data<-horse.data[indicator1==TRUE,]
indicator2<-race.date==as.Date("31/12/2007","%d/%m/%Y")+i+1
test.data<-horse.data[indicator2==TRUE,]
##############################
include.data<-ifelse(model.data$gender=="",0,1)*
ifelse(model.data$race_id==187262,0,1)*
ifelse(model.data$race_id==184238,0,1)*
ifelse(model.data$race_id==179250,0,1)*
ifelse(model.data$starting_price_decimal=="NA",0,1)*
ifelse(model.data$days_since_ran<=100,1,0)*
ifelse(model.data$age>=0&model.data$age<=20,1,0)
include.data<-ifelse(is.na(include.data),0,include.data)
#ifelse(model.data$speed_rating<=120,1,0)
#ifelse(model.data$official_rating>=0,1,0)
###############################
include.test.data<-ifelse(test.data$gender=="",0,1)*
ifelse(test.data$race_id==187262,0,1)*
ifelse(test.data$race_id==184238,0,1)*
ifelse(test.data$race_id==179250,0,1)*
ifelse(test.data$starting_price_decimal=="NA",0,1)*
ifelse(test.data$days_since_ran<=100,1,0)*
ifelse(test.data$age>=0&test.data$age<=20,1,0)
include.test.data<-ifelse(is.na(include.test.data),0,include.test.data)
test.data.reduced<-test.data[include.test.data==1,]
test.data.reduced$stall_number.f<-as.factor(test.data.reduced$stall_number)
test.data.reduced$age.f<-as.factor(test.data.reduced$age)
test.data.reduced$win.f<-as.factor(test.data.reduced$win)
#########################
model.data.reduced<-model.data[include.data==1,]
model.data.reduced$stall_number.f<-as.factor(model.data.reduced$stall_number)
model.data.reduced$age.f<-as.factor(model.data.reduced$age)
model.data.reduced$win.f<-as.factor(model.data.reduced$win)
model<-glm(win.f~days_since_ran+gender+official_rating+stall_number.f+age.f,data=model.data.reduced,family=binomial)
#########################
#Do predictions
model.prob<-predict(model,newdata=test.data.reduced,type="response")
bm.prob<-test.data.reduced$bm_win_prob
max(bm_win_prob)
c<-1.3
bet<-ifelse(model.prob>c*bm.prob,1,0)
#############Only bet on one horse per race, highest win.prob out of them?
profit<-ifelse(win==1,starting_price_decimal-1,-1)
profit<-ifelse(bet==1,profit,0)
profit<-ifelse(is.na(profit),0,profit)
}
它给了我这个错误:
Error in family(object)$linkinv(pred) :
Argument eta must be a nonempty numeric vector
我不知道在哪里开始修复它。我认为这与我在每次循环后尝试将利润应用于向量有关,但我摆脱了它并且它仍然发生...
非常感谢任何帮助。
答案 0 :(得分:1)
在 for循环的某个时刻,glm()
未向变量model
返回任何内容。
将此空model
传递给预测时,R
正在生成此错误。此问题的根源可能是因为glm()
无法使模型适合model.data.reduced
。您应该检查传递给model.data.reduced
的{{1}}的大小。
在使用glm()
创建模型并将结果传递给glm()
内部时,我遇到了同样的问题。