我想将一些并行的R代码放入函数中,但是当在函数内部而不是函数外部时,该代码的运行速度要慢得多。
该代码采用一个坐标矩阵并将其转换为sf
对象。下面的演示代码将相同的过程运行两次。
示例代码:
# Packages
library(parallel)
library(sf)
library(pbapply)
# Sample Data
mat = matrix(runif(1e6,max = 1e5), ncol = 2)
seqs = 1:(nrow(mat)-1)
ncores = 6 # Change as needed
start1 = Sys.time()
# Code not running in a function #########
cl = parallel::makeCluster(ncores)
parallel::clusterExport(cl=cl, varlist=c("mat"), envir = environment())
geoms = pbapply::pblapply(seqs, function(y){sf::st_linestring(mat[c(y,y+1),c(1,2)])}, cl = cl)
parallel::stopCluster(cl)
end1 = Sys.time()
# Same code in a function ###########
func = function(mat, seqs, ncores){
cl = parallel::makeCluster(ncores)
parallel::clusterExport(cl=cl, varlist=c("mat"), envir = environment())
geoms = pbapply::pblapply(seqs, function(y){sf::st_linestring(mat[c(y,y+1),c(1,2)])}, cl = cl)
parallel::stopCluster(cl)
return(geoms)
}
start2 = Sys.time()
res = func(mat, seqs, ncores)
end2 = Sys.time()
# Compare Results
difftime(end1, start1, units = "s")
difftime(end2, start2, units = "s")
比较时间后,函数中代码的速度明显降低
difftime(end1, start1, units = "s")
Time difference of 41.98823 secs
difftime(end2, start2, units = "s")
Time difference of 114.983 secs