library(parallel)
# Calculate the number of cores
no_cores <- detectCores() - 1
# Initiate cluster
cl <- makeCluster(no_cores)
statesNames=c("a","b","c")
mcA<-new("markovchain", states=statesNames, transitionMatrix=matrix(c(0.2,0.5,0.3,0,0.2,0.8,0.1,0.8,0.1),nrow=3, byrow=TRUE, dimnames=list(statesNames,statesNames)))
mcB<-new("markovchain", states=statesNames, transitionMatrix=matrix(c(0.2,0.5,0.3,0,0.2,0.8,0.1,0.8,0.1),nrow=3, byrow=TRUE, dimnames=list(statesNames,statesNames)))
mcC<-new("markovchain", states=statesNames, transitionMatrix=matrix(c(0.2,0.5,0.3,0,0.2,0.8,0.1,0.8,0.1),nrow=3, byrow=TRUE, dimnames=list(statesNames,statesNames)))
mclist <- new("markovchainList", markovchains = list(mcA, mcB, mcC))
mc <- mclist
clusterExport(cl, "mclist")
f <- function(x) {
n <- length(mclist@markovchains)
seq <- character(length = n)
t0 <- (mclist@markovchains[[1]]@states)[1]
for(i in 1:n) {
stateName <- mclist@markovchains[[i]]@states
t0 <- sample(x = stateName, size = 1,
prob = mclist@markovchains[[i]]@transitionMatrix[which(stateName == t0 ), ])
seq[i] <- t0
}
return(seq)
}
我有两个执行相同任务的功能。一个是使用并行包,另一个不使用。我想在使用并行包后执行会很快。但它似乎很慢。
> microbenchmark(rmarkovchain(100, mc, "matrix",useRCpp = F), parSapply(cl, 1:100,f))
Unit: milliseconds
expr min lq mean median uq max neval
rmarkovchain(100, mc, "matrix", useRCpp = F) 3.632955 4.251373 5.611569 5.507326 6.681284 11.92689 100
parSapply(cl, 1:100, f) 40.929350 43.893277 45.516566 45.373365 47.366842 52.80290 100
由于我使用的是linux,我使用的是mclapply而不是parSapply,现在它比parSapply更好但仍然更慢。
> microbenchmark(rmarkovchain(100, mc, "matrix",useRCpp = F), mclapply(cl, 1:100,f))
Unit: milliseconds
expr min lq mean median uq max neval
rmarkovchain(100, mc, "matrix", useRCpp = F) 3.798599 3.97889 6.636692 6.053313 8.935721 18.08281 100
mclapply(cl, 1:100, f) 14.862175 20.81366 26.211019 25.636895 31.893560 34.42886 100
为什么我无法在R中使用并行包加快工作?
答案 0 :(得分:0)
我完全同意Andrew Taylor。
我已阅读this帖子,在那里我读到当某个任务需要很长时间(几秒钟)时使用并行包会很有用。
为了测试这个,我增加了输出的数量,即从100增加到10000
> microbenchmark(rmarkovchain(10000, mc, "matrix",useRCpp = F), mclapply(cl, 1:10000,f))
Unit: milliseconds
expr min lq mean median uq max neval
rmarkovchain(10000, mc, "matrix", useRCpp = F) 1140.07516 1186.11030 1294.44378 1224.73236 1401.15991 1824.34178 100
mclapply(cl, 1:10000, f) 18.27705 22.48832 28.23278 30.33396 33.06192 42.69159 100
现在,并行处理比顺序执行更快。