我最近有一台带有多个内核的计算机,我正在学习使用并行计算。我对lapply
非常熟练,并被告知parLapply
的工作非常相似。我虽然没有正确操作。似乎我必须明确地将所有内容放在parLapply
中以使其工作(即要使用的函数,变量等)。使用lapply
时,它会从父环境中读取,parLapply
似乎不会执行此操作。因此,在下面的示例中,我可以通过将所有信息放在parLapply
中来使一切正常工作,但如果我在用户定义的函数中使用它,我就无法将text.var
明确地放在parLapply
中。
library(parallel)
text.var <- rep("I like cake and ice cream so much!", 20)
ntv <- length(text.var)
gc.rate <- 10
pos <- function(i) {
paste(sapply(strsplit(tolower(i), " "), nchar), collapse=" | ")
}
lapply(seq_len(ntv), function(i) {
x <- pos(text.var[i])
if (i%%gc.rate==0) gc()
return(x)
}
)
#doesn't work
cl <- makeCluster(mc <- getOption("cl.cores", 4))
parLapply(cl, seq_len(ntv), function(i) {
x <- pos(text.var[i])
if (i%%gc.rate==0) gc()
return(x)
}
)
#does work but have to specify all the stuff inside parLapply
cl <- makeCluster(mc <- getOption("cl.cores", 4))
parLapply(cl, seq_len(ntv), function(i) {
######stuff I have to put inside parLapply##########
text.var <- rep("I like cake and ice cream so much!", 20)
ntv <- length(text.var)
gc.rate <- 10
pos <- function(i) {
paste(sapply(strsplit(tolower(i), " "), nchar), collapse=" | ")
}
######stuff I have to put inside parLapply##########
x <- pos(text.var[i])
if (i%%gc.rate==0) gc()
return(x)
}
)
如何在不明确地将text.var
,ntv
,gc.rate
和pos
传递给parLapply
的情况下将其传递给parLapply
? (我猜你以某种方式将它们作为列表传递)
PS windows 7机器所以我需要使用{{1}}我认为
答案 0 :(得分:39)
您需要将这些变量导出到群集中的其他R进程:
cl <- makeCluster(mc <- getOption("cl.cores", 4))
clusterExport(cl=cl, varlist=c("text.var", "ntv", "gc.rate", "pos"))
答案 1 :(得分:9)
alternate method provided by Martin Morgan也适用于此。
此方法直接在parLapply
调用中将对象提供给集群中的每个节点,而无需使用集群导出:
library(parallel)
text.var <- rep("I like cake and ice cream so much!", 20)
ntv <- length(text.var)
gc.rate <- 10
pos <- function(i) {
paste(sapply(strsplit(tolower(i), " "), nchar), collapse=" | ")
}
cl <- makeCluster(mc <- getOption("cl.cores", 4))
parLapply(cl, seq_len(ntv), function(i, pos, text.var, ntv, gc.rate) {
x <- pos(text.var[i])
if (i%%gc.rate==0) gc()
return(x)
}, pos, text.var, ntv, gc.rate
)
答案 2 :(得分:-1)
out1<-lapply(seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)})
out2<-parLapply(cl, seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)})
> identical(out1,out2)
# [1] TRUE
require(rbenchmark)
benchmark(lapply(seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)}),parLapply(cl, seq_len(ntv), function(i) {x <- pos(text.var[i]);if (i%%gc.rate==0) gc();return(x)}))
test
#1 lapply(seq_len(ntv), function(i) {\n x <- pos(text.var[i])\n if (i%%gc.rate == 0) \n gc()\n return(x)\n})
#2 parLapply(cl, seq_len(ntv), function(i) {\n x <- pos(text.var[i])\n if (i%%gc.rate == 0) \n gc()\n return(x)\n})
# replications elapsed relative user.self sys.self user.child sys.child
#1 100 20.03 3.453448 20.31 0.05 NA NA
#2 100 5.80 1.000000 0.22 0.03 NA NA
> cl
socket cluster with 2 nodes on host ‘localhost’