空的数字向量?

时间:2014-04-15 17:38:54

标签: r

所以我运行这个循环:(我的数据文件非常大,.csv)

for (i in 1:365){

race.date<-as.Date(meeting_date,"%d/%m/%Y")

indicator1<-race.date<=as.Date("31/12/2007","%d/%m/%Y")+i
model.data<-horse.data[indicator1==TRUE,]

indicator2<-race.date==as.Date("31/12/2007","%d/%m/%Y")+i+1
test.data<-horse.data[indicator2==TRUE,]

##############################
include.data<-ifelse(model.data$gender=="",0,1)*
    ifelse(model.data$race_id==187262,0,1)*
ifelse(model.data$race_id==184238,0,1)*
    ifelse(model.data$race_id==179250,0,1)*
ifelse(model.data$starting_price_decimal=="NA",0,1)*
    ifelse(model.data$days_since_ran<=100,1,0)*
ifelse(model.data$age>=0&model.data$age<=20,1,0)

include.data<-ifelse(is.na(include.data),0,include.data)

#ifelse(model.data$speed_rating<=120,1,0)
    #ifelse(model.data$official_rating>=0,1,0)
###############################



include.test.data<-ifelse(test.data$gender=="",0,1)*
    ifelse(test.data$race_id==187262,0,1)*
ifelse(test.data$race_id==184238,0,1)*
    ifelse(test.data$race_id==179250,0,1)*
ifelse(test.data$starting_price_decimal=="NA",0,1)*
    ifelse(test.data$days_since_ran<=100,1,0)*
ifelse(test.data$age>=0&test.data$age<=20,1,0)

include.test.data<-ifelse(is.na(include.test.data),0,include.test.data)

test.data.reduced<-test.data[include.test.data==1,]

test.data.reduced$stall_number.f<-as.factor(test.data.reduced$stall_number)
test.data.reduced$age.f<-as.factor(test.data.reduced$age)
test.data.reduced$win.f<-as.factor(test.data.reduced$win)

#########################
model.data.reduced<-model.data[include.data==1,]

model.data.reduced$stall_number.f<-as.factor(model.data.reduced$stall_number)
model.data.reduced$age.f<-as.factor(model.data.reduced$age)
model.data.reduced$win.f<-as.factor(model.data.reduced$win)

model<-glm(win.f~days_since_ran+gender+official_rating+stall_number.f+age.f,data=model.data.reduced,family=binomial)
#########################

#Do predictions
model.prob<-predict(model,newdata=test.data.reduced,type="response")

bm.prob<-test.data.reduced$bm_win_prob
max(bm_win_prob)

c<-1.3
bet<-ifelse(model.prob>c*bm.prob,1,0)
#############Only bet on one horse per race, highest win.prob out of them?
profit<-ifelse(win==1,starting_price_decimal-1,-1)
profit<-ifelse(bet==1,profit,0)
profit<-ifelse(is.na(profit),0,profit)


}

它给了我这个错误:

Error in family(object)$linkinv(pred) : 
  Argument eta must be a nonempty numeric vector

我不知道在哪里开始修复它。我认为这与我在每次循环后尝试将利润应用于向量有关,但我摆脱了它并且它仍然发生...

非常感谢任何帮助。

1 个答案:

答案 0 :(得分:1)

for循环的某个时刻,glm()未向变量model返回任何内容。

将此空model传递给预测时,R正在生成此错误。此问题的根源可能是因为glm()无法使模型适合model.data.reduced。您应该检查传递给model.data.reduced的{​​{1}}的大小。

在使用glm()创建模型并将结果传递给glm()内部时,我遇到了同样的问题。