使用并行的parLapply:无法访问并行代码中的变量

时间:2012-08-18 15:08:02

标签: r parallel-processing

我最近有一台带有多个内核的计算机,我正在学习使用并行计算。我对lapply非常熟练,并被告知parLapply的工作非常相似。我虽然没有正确操作。似乎我必须明确地将所有内容放在parLapply中以使其工作(即要使用的函数,变量等)。使用lapply时,它会从父环境中读取,parLapply似乎不会执行此操作。因此,在下面的示例中,我可以通过将所有信息放在parLapply中来使一切正常工作,但如果我在用户定义的函数中使用它,我就无法将text.var明确地放在parLapply中。

library(parallel)
text.var <- rep("I like cake and ice cream so much!", 20)
ntv <- length(text.var)
gc.rate <- 10

pos <-  function(i) {
    paste(sapply(strsplit(tolower(i), " "), nchar), collapse=" | ")
}

lapply(seq_len(ntv), function(i) {
        x <- pos(text.var[i])
        if (i%%gc.rate==0) gc()
        return(x)
    }

)

#doesn't work
cl <- makeCluster(mc <- getOption("cl.cores", 4))
parLapply(cl, seq_len(ntv), function(i) {
        x <- pos(text.var[i])
        if (i%%gc.rate==0) gc()
        return(x)
    }

)

#does work but have to specify all the stuff inside parLapply
cl <- makeCluster(mc <- getOption("cl.cores", 4))
parLapply(cl, seq_len(ntv), function(i) {
        ######stuff I have to put inside parLapply##########
        text.var <- rep("I like cake and ice cream so much!", 20)
        ntv <- length(text.var)
        gc.rate <- 10
        pos <-  function(i) {
            paste(sapply(strsplit(tolower(i), " "), nchar), collapse=" | ")
        }
        ######stuff I have to put inside parLapply##########
        x <- pos(text.var[i])
        if (i%%gc.rate==0) gc()
        return(x)
    }
)

如何在不明确地将text.varntvgc.ratepos传递给parLapply的情况下将其传递给parLapply? (我猜你以某种方式将它们作为列表传递)

PS windows 7机器所以我需要使用{{1}}我认为

3 个答案:

答案 0 :(得分:39)

您需要将这些变量导出到群集中的其他R进程:

cl <- makeCluster(mc <- getOption("cl.cores", 4))
clusterExport(cl=cl, varlist=c("text.var", "ntv", "gc.rate", "pos"))

答案 1 :(得分:9)

alternate method provided by Martin Morgan也适用于此。

此方法直接在parLapply调用中将对象提供给集群中的每个节点,而无需使用集群导出:

library(parallel)
text.var <- rep("I like cake and ice cream so much!", 20)
ntv <- length(text.var)
gc.rate <- 10

pos <-  function(i) {
    paste(sapply(strsplit(tolower(i), " "), nchar), collapse=" | ")
}

cl <- makeCluster(mc <- getOption("cl.cores", 4))
parLapply(cl, seq_len(ntv), function(i, pos, text.var, ntv, gc.rate) {
        x <- pos(text.var[i])
        if (i%%gc.rate==0) gc()
        return(x)
    }, pos, text.var, ntv, gc.rate
)

答案 2 :(得分:-1)

out1<-lapply(seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)})
out2<-parLapply(cl, seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)})

>     identical(out1,out2)
# [1] TRUE
require(rbenchmark)
benchmark(lapply(seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)}),parLapply(cl, seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)}))


                                                                                       test
#1        lapply(seq_len(ntv), function(i) {\n    x <- pos(text.var[i])\n    if (i%%gc.rate == 0) \n        gc()\n    return(x)\n})
#2 parLapply(cl, seq_len(ntv), function(i) {\n    x <- pos(text.var[i])\n    if (i%%gc.rate == 0) \n        gc()\n    return(x)\n})
#  replications elapsed relative user.self sys.self user.child sys.child
#1          100   20.03 3.453448     20.31     0.05         NA        NA
#2          100    5.80 1.000000      0.22     0.03         NA        NA

> cl
socket cluster with 2 nodes on host ‘localhost’