我正在编写一个函数来执行向后逐步特征选择。我使用foreach作为内部循环,假设并行可以节省一些时间。但是,进程查看器显示它在第一次迭代中仅使用多个线程。如果使用%do%
而不是%dopar%
,则代码在一个线程上正常工作。
thedata # my data
fit.ols # model on thedata
cl <- makeCluster(2)
registerDoParallel(cl)
for (i in 1:(nv - max(0,nk - 1))) {
fit2.ols <- fit.ols
pname <- names(fit.ols$Design$unit)
pname2 <- fit.ols$Design$name
pterm <- attr(fit.ols$terms, "term.labels")
drop1Model <- foreach (j = pname,
.packages = c("rms", "stats"),
.export = c("thedata", "pname2", "pterm")) %dopar% {
drop.var <- j
remove.index <-
which(unlist(lapply(strsplit(pname2," \\* "),
function(x)
any(!is.na(match(j,x))))))
remove.term <- pterm[remove.index]
model <- update(fit.ols,
as.formula(paste(
".~ . - ", paste(remove.term, collapse = "-"), sep = ""
)))
drop1Model <- list(drop.var = drop.var,
remove.index = remove.index,
remove.term = remove.term,
model = model)
}
# browser()c
rsq <-
sapply(drop1Model,function(x)
x$model$stats["R2"])
max.rsq <- max(rsq)
drop.index <- which(rsq == max.rsq)[1]
fit.ols <- drop1Model[[drop.index]]$model
dropTerms[[i]] <- drop1Model[[drop.index]]$remove.term
res[i + 1, 1] <- drop1Model[[drop.index]]$drop.var
res[i + 1, 2:3] <- c(i, fit.ols$stats[["R2"]])
if (fit.ols$stats[["R2"]] <= cutoff) {
minimodel <- fit2.ols
}
}
注意:我在Ubuntu 14.04上使用R 3.2.5和最新的rms,foreach和doParallel包。