我正在尝试使用foreach来利用并行处理来完成一个完整的子集回归问题。我正在尝试使用gamm4包来拟合完整的模型列表,使用二项式函数,其中响应以比例提供,权重参数提供试验次数。使用%do%运行时代码运行正常但在%dopar%下运行失败(仅为AIC和BIC返回NA)。奇怪的是,如果省略gamm4调用的权重参数,代码确实可以使用%dopar%罚款,但显然这不是一个可行的解决方案。我一直在使用类似的代码,没有基于高斯分布和二项分布的问题,其中响应输入为1,0s(因此不需要调用权重),完全没有问题。我使用的是Windows 7 64位,R版本3.1.2。我已经更新了所有相关的包。一个可重复的(但玩具)例子:
set.seed(666)
# generate a random factor with a random offset effect
random.factor=factor(sort(rep(1:10,10)))
random.effect=sort(rep(rnorm(10),10))
# generate some random predictor variables
X1 = rnorm(100)
X2 = rnorm(100)
X3 = rnorm(100)
X4 = rep(0,100) # make it so one variable fails (just to check the "try" if statement)
#X4 = rnorm(100)
X5 = rnorm(100)
# calculate a response variable based on some of the predictors
z = 1 + 2*X1 + 3*X2 + 2*X3^2 # linear combination with a bias
pr = 1/(1+exp(-(z+random.effect))) # pass through an inv-logit function
y = rbinom(n=100,size=100,pr)/100 # bernoulli response variable.
# Note that the response variable is a proprotion of successes of 100 trials
# We want to feed the number of trials as a "weights" argument to gamm
# now make a data frame of predictors
pred.dat=data.frame(X1=X1,X2=X2,X3=X3,X4=X4,X5=X5)
pred.vars=colnames(pred.dat)
# make a dataframe for passing to gamm
use.dat = data.frame(random.factor=random.factor,y=y,pred.dat)
# now set up the models to run
# this includes all combinations of variables, but only up to a total of two in
# any one model
model.fits.test=c(combn(1:ncol(pred.dat), 1,simplify = F),
combn(1:ncol(pred.dat), 2,simplify = F))
models.use=list(1,2,3,4,5)
n.models=length(model.fits.test)
require(lme4)
require(doParallel)
registerDoParallel(cores=4)
# if I run this using do, it works fine (with error values from the try argument
# returned for models that fail)
out.dat<-foreach(l = 1:n.models,.combine=rbind,
.packages=c("lme4","gamm4"))%do%{
vars.vec=model.fits.test[[l]]
formula.l<-as.formula(paste("y~",
paste(colnames(pred.dat)[vars.vec],collapse="+"),"+(1|random.factor)",sep=""))
model.fit=try(glmer(formula.l,
data=use.dat,
family="binomial",
weights=rep(100,nrow(use.dat))))
success<-class(model.fit)[[1]]!="try-error"
out.vec<-c(rep(NA,2),rep(NA,ncol(pred.dat)))
names(out.vec)<- c("AIC","BIC",colnames(pred.dat))
out.vec[
which(match(names(out.vec),pred.vars[vars.vec])>0)]<-1
if(success){
out.vec["AIC"]<-AIC(model.fit)
out.vec["BIC"]<-BIC(model.fit)
}
return(out.vec)
}
out.dat
# if I run using dopar, nothing is returned.
out.dat<-foreach(l = 1:n.models,.combine=rbind,
.packages=c("lme4","gamm4"))%dopar%{
vars.vec=model.fits.test[[l]]
formula.l<-as.formula(paste("y~",
paste(colnames(pred.dat)[vars.vec],collapse="+"),"+(1|random.factor)",sep=""))
model.fit=try(glmer(formula.l,
data=use.dat,
family="binomial",
weights=rep(100,nrow(use.dat))))
success<-class(model.fit)[[1]]!="try-error"
out.vec<-c(rep(NA,2),rep(NA,ncol(pred.dat)))
names(out.vec)<- c("AIC","BIC",colnames(pred.dat))
out.vec[
which(match(names(out.vec),pred.vars[vars.vec])>0)]<-1
if(success){
out.vec["AIC"]<-AIC(model.fit)
out.vec["BIC"]<-BIC(model.fit)
}
return(out.vec)
}
out.dat
# Now run dopar without the weights argument (not really appropriate,
# but for the sake of demonstration). I get results again, but it doesn't
# really make sense to do this. Also, my real example fails unless I can supply
# weights.
out.dat<-foreach(l = 1:n.models,.combine=rbind,
.packages=c("lme4","gamm4"))%dopar%{
vars.vec=model.fits.test[[l]]
formula.l<-as.formula(paste("y~1+",
paste("s(",colnames(pred.dat)[vars.vec],")",collapse="+"),sep=""))
model.fit=try(gamm4(formula.l, random=~(1|random.factor),
data=use.dat,family="binomial"))
success<-class(model.fit)[[1]]!="try-error"
out.vec<-c(rep(NA,2),rep(NA,ncol(pred.dat)))
names(out.vec)<- c("AIC","BIC",colnames(pred.dat))
out.vec[
which(match(names(out.vec),pred.vars[vars.vec])>0)]<-1
if(success){
out.vec["AIC"]<-AIC(model.fit$mer)
out.vec["BIC"]<-BIC(model.fit$mer)
}
return(out.vec)
}
out.dat