麻烦使用来自doParallel的foreach和gamm4

时间:2015-01-21 08:39:28

标签: r foreach parallel-processing

我正在尝试使用foreach来利用并行处理来完成一个完整的子集回归问题。我正在尝试使用gamm4包来拟合完整的模型列表,使用二项式函数,其中响应以比例提供,权重参数提供试验次数。使用%do%运行时代码运行正常但在%dopar%下运行失败(仅为AIC和BIC返回NA)。奇怪的是,如果省略gamm4调用的权重参数,代码确实可以使用%dopar%罚款,但显然这不是一个可行的解决方案。我一直在使用类似的代码,没有基于高斯分布和二项分布的问题,其中响应输入为1,0s(因此不需要调用权重),完全没有问题。我使用的是Windows 7 64位,R版本3.1.2。我已经更新了所有相关的包。一个可重复的(但玩具)例子:

set.seed(666)

# generate a random factor with a random offset effect
random.factor=factor(sort(rep(1:10,10)))
random.effect=sort(rep(rnorm(10),10))

# generate some random predictor variables
X1 = rnorm(100)
X2 = rnorm(100)
X3 = rnorm(100)
X4 = rep(0,100)  # make it so one variable fails (just to check the "try" if statement)
#X4 = rnorm(100)
X5 = rnorm(100)

# calculate a response variable based on some of the predictors
z = 1 + 2*X1 + 3*X2 + 2*X3^2        # linear combination with a bias
pr = 1/(1+exp(-(z+random.effect)))         # pass through an inv-logit function
y = rbinom(n=100,size=100,pr)/100      # bernoulli response variable.
 # Note that the response variable is a proprotion of successes of 100 trials
 # We want to feed the number of trials as a "weights" argument to gamm

# now make a data frame of predictors
pred.dat=data.frame(X1=X1,X2=X2,X3=X3,X4=X4,X5=X5)
pred.vars=colnames(pred.dat)

# make a dataframe for passing to gamm
use.dat = data.frame(random.factor=random.factor,y=y,pred.dat)

# now set up the models to run
# this includes all combinations of variables, but only up to a total of two in
# any one model
model.fits.test=c(combn(1:ncol(pred.dat), 1,simplify = F),
             combn(1:ncol(pred.dat), 2,simplify = F))


models.use=list(1,2,3,4,5)
n.models=length(model.fits.test)

require(lme4)
require(doParallel)

registerDoParallel(cores=4)

# if I run this using do, it works fine (with error values from the try argument
# returned for models that fail)
out.dat<-foreach(l = 1:n.models,.combine=rbind,
                 .packages=c("lme4","gamm4"))%do%{
  vars.vec=model.fits.test[[l]]
  formula.l<-as.formula(paste("y~",
         paste(colnames(pred.dat)[vars.vec],collapse="+"),"+(1|random.factor)",sep=""))

  model.fit=try(glmer(formula.l,
                       data=use.dat,
                       family="binomial",
                       weights=rep(100,nrow(use.dat))))

  success<-class(model.fit)[[1]]!="try-error"

  out.vec<-c(rep(NA,2),rep(NA,ncol(pred.dat)))
  names(out.vec)<- c("AIC","BIC",colnames(pred.dat))

        out.vec[
             which(match(names(out.vec),pred.vars[vars.vec])>0)]<-1

  if(success){
        out.vec["AIC"]<-AIC(model.fit)
        out.vec["BIC"]<-BIC(model.fit)
        }
  return(out.vec)
  }

out.dat

# if I run using dopar, nothing is returned.
out.dat<-foreach(l = 1:n.models,.combine=rbind,
                 .packages=c("lme4","gamm4"))%dopar%{
  vars.vec=model.fits.test[[l]]
  formula.l<-as.formula(paste("y~",
         paste(colnames(pred.dat)[vars.vec],collapse="+"),"+(1|random.factor)",sep=""))

  model.fit=try(glmer(formula.l,
                       data=use.dat,
                       family="binomial",
                       weights=rep(100,nrow(use.dat))))

  success<-class(model.fit)[[1]]!="try-error"

  out.vec<-c(rep(NA,2),rep(NA,ncol(pred.dat)))
  names(out.vec)<- c("AIC","BIC",colnames(pred.dat))

        out.vec[
             which(match(names(out.vec),pred.vars[vars.vec])>0)]<-1

  if(success){
        out.vec["AIC"]<-AIC(model.fit)
        out.vec["BIC"]<-BIC(model.fit)
        }
  return(out.vec)
  }

out.dat



# Now run dopar without the weights argument (not really appropriate,
# but for the sake of demonstration). I get results again, but it doesn't
# really make sense to do this. Also, my real example fails unless I can supply
# weights.
out.dat<-foreach(l = 1:n.models,.combine=rbind,
                 .packages=c("lme4","gamm4"))%dopar%{
  vars.vec=model.fits.test[[l]]
  formula.l<-as.formula(paste("y~1+",
         paste("s(",colnames(pred.dat)[vars.vec],")",collapse="+"),sep=""))
  model.fit=try(gamm4(formula.l, random=~(1|random.factor),
                       data=use.dat,family="binomial"))
  success<-class(model.fit)[[1]]!="try-error"

  out.vec<-c(rep(NA,2),rep(NA,ncol(pred.dat)))
  names(out.vec)<- c("AIC","BIC",colnames(pred.dat))

        out.vec[
             which(match(names(out.vec),pred.vars[vars.vec])>0)]<-1

  if(success){
        out.vec["AIC"]<-AIC(model.fit$mer)
        out.vec["BIC"]<-BIC(model.fit$mer)
        }
  return(out.vec)
  }

out.dat

0 个答案:

没有答案