代替循环在R中并行运行

时间:2014-09-04 17:27:00

标签: r for-loop parallel-processing

我有这个for循环:

library(EnvStats)
mvfyfueu <- matrix(, nrow = 0, ncol = 3)
for (i in 1:2000 ) {

  # fy (S355, yield strength N/mm2)
  meanmean = 419.38 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 400 #(**)
  meanmeanuv = 440 #(**)
  meanfy <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 20.25 #(*)
  sdsd = 5 #(**)
  meansdlv = 15 #(**)
  meansduv = 25 #(**)
  sdfy <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  fylv = 355 #(*)
  fyuv = 500 #(**)  
  lsupfy <- 1 - plnormTruncAlt(fyuv, mean = meanfy[1], cv = sdfy[1]/meanfy[1]) - 1e-10
  linffy <- plnormTruncAlt(fylv, mean = meanfy[1], cv = sdfy[1]/meanfy[1]) - 1e-10

  # fu (S355, tensile strength N/mm2)
  meanmean = 533.44 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 500 #(**)
  meanmeanuv = 550 #(**)
  meanfu <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 16.53 #(*)
  sdsd = 5 #(**)
  meansdlv = 10 #(**)
  meansduv = 25 #(**)
  sdfu <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  fulv = 470 #(*)
  fuuv = 630 #(*)  
  lsupfu <- 1 - plnormTruncAlt(fuuv, mean = meanfu[1], cv = sdfu[1]/meanfu[1]) - 1e-10
  linffu <- plnormTruncAlt(fulv, mean = meanfu[1], cv = sdfu[1]/meanfu[1]) - 1e-10

  # eu (S355, strain at maximum strength mm/mm)
  meanmean = 0.2645 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 0.2 #(**)
  meanmeanuv = 0.3 #(**)
  meaneu <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 0.0613 #(*)
  sdsd = 0.02 #(**)
  meansdlv = 0.02 #(**)
  meansduv = 0.1 #(**)
  sdeu <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  eulv = 0.1 #(*)
  euuv = 0.3 #(*)  
  lsupeu <- 1 - plnormTruncAlt(euuv, mean = meaneu[1], cv = sdeu[1]/meaneu[1]) - 1e-10
  linfeu <- plnormTruncAlt(eulv, mean = meaneu[1], cv = sdeu[1]/meaneu[1]) - 1e-10

  #Generate samples
  mat.fyfueu <- simulateMvMatrix(2000,
                              distributions = c(fy = "lnormAlt",fu = "lnormAlt",eu = "lnormAlt"),
                              param.list = list(fy = list(mean=meanfy[1], cv=sdfy[1]/meanfy[1]),
                                                fu = list(mean=meanfu[1], cv=sdfu[1]/meanfu[1]),
                                                eu = list(mean=meaneu[1], cv=sdeu[1]/meaneu[1])),
                              left.tail.cutoff = c(fy = ifelse(linffy <= 1e-5, 0, linffy),
                                                      fu = ifelse(linffu <= 1e-5, 0, linffu),
                                                      eu = ifelse(linfeu <= 1e-5, 0, linfeu)),
                              right.tail.cutoff = c(fy = ifelse(lsupfy <= 0, .Machine$double.eps, lsupfy),
                                                       fu = ifelse(lsupfu <= 0, .Machine$double.eps, lsupfu),
                                                       eu = ifelse(lsupeu <= 0, .Machine$double.eps, lsupeu)),
                              cor.mat = matrix(c(1, .75, -0.45, .75, 1, -0.6, -0.45, -0.6, 1), 3, 3),
                              sample.method = "LHS", max.iter = 100) #, seed = i

  mvfyfueu <- rbind2(mvfyfueu, mat.fyfueu)
}

dist 功能:

dist <- function(meanv, sdv, lv, uv, dist) {
  library(EnvStats)
  lsup <- 1 - pnorm(uv, mean = meanv, sd = sdv)
  linf <- pnorm(lv, mean = meanv, sd = sdv)
  value <- simulateVector(2, distribution = dist,
                       param.list = list(mean = meanv, sd = sdv), #seed = i,
                       sort = FALSE, left.tail.cutoff = ifelse(linf == 0, .Machine$double.eps, linf),
                       right.tail.cutoff = ifelse(lsup == 0, .Machine$double.eps, lsup), sample.method = "LHS")
  return(value)
}

现在我想并行运行它,所以我把它改为:

library(doParallel)
library(foreach)
#setup parallel backend to use 7 processors
cl<-makeCluster(7)
registerDoParallel(cl)
library(EnvStats)
mvfyfueu <- matrix(, nrow = 0, ncol = 3)
iters <- 100
foreach(icount(iters)) %dopar% {

  # fy (S355, yield strength N/mm2)
  meanmean = 419.38 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 400 #(**)
  meanmeanuv = 440 #(**)
  meanfy <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 20.25 #(*)
  sdsd = 5 #(**)
  meansdlv = 15 #(**)
  meansduv = 25 #(**)
  sdfy <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  fylv = 355 #(*)
  fyuv = 500 #(**)  
  lsupfy <- 1 - plnormTruncAlt(fyuv, mean = meanfy[1], cv = sdfy[1]/meanfy[1]) - 1e-10
  linffy <- plnormTruncAlt(fylv, mean = meanfy[1], cv = sdfy[1]/meanfy[1]) - 1e-10

  # fu (S355, tensile strength N/mm2)
  meanmean = 533.44 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 500 #(**)
  meanmeanuv = 550 #(**)
  meanfu <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 16.53 #(*)
  sdsd = 5 #(**)
  meansdlv = 10 #(**)
  meansduv = 25 #(**)
  sdfu <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  fulv = 470 #(*)
  fuuv = 630 #(*)  
  lsupfu <- 1 - plnormTruncAlt(fuuv, mean = meanfu[1], cv = sdfu[1]/meanfu[1]) - 1e-10
  linffu <- plnormTruncAlt(fulv, mean = meanfu[1], cv = sdfu[1]/meanfu[1]) - 1e-10

  # eu (S355, strain at maximum strength mm/mm)
  meanmean = 0.2645 #(*)
  sdmean = 10 #(**)
  meanmeanlv = 0.2 #(**)
  meanmeanuv = 0.3 #(**)
  meaneu <- dist(meanmean,sdmean,meanmeanlv,meanmeanuv,"norm")

  meansd = 0.0613 #(*)
  sdsd = 0.02 #(**)
  meansdlv = 0.02 #(**)
  meansduv = 0.1 #(**)
  sdeu <- dist(meansd,sdsd,meansdlv,meansduv,"norm")

  eulv = 0.1 #(*)
  euuv = 0.3 #(*)  
  lsupeu <- 1 - plnormTruncAlt(euuv, mean = meaneu[1], cv = sdeu[1]/meaneu[1]) - 1e-10
  linfeu <- plnormTruncAlt(eulv, mean = meaneu[1], cv = sdeu[1]/meaneu[1]) - 1e-10

  #Generate samples
  mat.fyfueu <- simulateMvMatrix(2000,
                                 distributions = c(fy = "lnormAlt",fu = "lnormAlt",eu = "lnormAlt"),
                                 param.list = list(fy = list(mean=meanfy[1], cv=sdfy[1]/meanfy[1]),
                                                   fu = list(mean=meanfu[1], cv=sdfu[1]/meanfu[1]),
                                                   eu = list(mean=meaneu[1], cv=sdeu[1]/meaneu[1])),
                                 left.tail.cutoff = c(fy = ifelse(linffy <= 1e-5, 0, linffy),
                                                      fu = ifelse(linffu <= 1e-5, 0, linffu),
                                                      eu = ifelse(linfeu <= 1e-5, 0, linfeu)),
                                 right.tail.cutoff = c(fy = ifelse(lsupfy <= 0, .Machine$double.eps, lsupfy),
                                                       fu = ifelse(lsupfu <= 0, .Machine$double.eps, lsupfu),
                                                       eu = ifelse(lsupeu <= 0, .Machine$double.eps, lsupeu)),
                                 cor.mat = matrix(c(1, .75, -0.45, .75, 1, -0.6, -0.45, -0.6, 1), 3, 3),
                                 sample.method = "LHS", max.iter = 100) #, seed = i

  mvfyfueu <- rbind2(mvfyfueu, mat.fyfueu)
}

但是在并行运行结束时,我得到一个空的mvfyfueu矩阵:

> mvfyfueu
     [,1] [,2] [,3]

这与串行运行的结果完全不同。我应该纠正什么?感谢

1 个答案:

答案 0 :(得分:2)

由于我没有适当的对象来测试您的功能,或者没有时间创建可比较的脚本版本,因此我只想使用一些玩具数据和一个简单的过程来演示 -

library(iterators)
library(foreach)
## your parallel backend setup may 
## be different, but that shouldn't
## affect anything
library(doSNOW)
library(parallel)
##
mvfyfueu <- matrix( , nrow = 0, ncol = 3)
iters <- 100
v1 <- v2 <- v3 <- 1:100
##
cl <- parallel::makeCluster("SOCK",3)
registerDoSNOW(cl)
##
mvfyfueu <- foreach(
  icount(iters),
  .combine=rbind) %dopar% {

    mat_row <- matrix(
      c(sample(v1),
        sample(v2),
        sample(v3)),
      nrow=1,
      ncol=3,
      byrow=TRUE)
    mat_row
  }
##
stopCluster(cl)
##
> dim(mvfyfueu)
[1] 100   3
> head(mvfyfueu)
     [,1] [,2] [,3]
[1,]   80   95   77
[2,]   75   24   57
[3,]   33   89   67
[4,]   29   91   75
[5,]   18   75   20
[6,]   54   44   25

当您使用foreach时,您绝对应该利用.combine参数来确定数据的组合方式(根据我的经验,这通常是rbind)。执行此操作时,您不需要在rbind正文中使用全局对象显式foreach每次迭代中的对象,.combine参数会处理此问题。正如我在评论中指出的那样,我认为有必要将foreach调用分配给一个对象。如果这有帮助,请告诉我,如果没有,请随时发布一些示例数据以测试您的代码。