我是R的新手,我正在尝试使用foreach循环和doSNOW包来并行化嵌套for循环 但是foreach循环比for循环需要更多的时间
她是我的代码:
初始化变量
m=1000
n=5000
k=3
x=matrix(c(rnorm(m*n/2,0,1),rnorm(m*n/2,2,1)),n,m,byrow=TRUE)
d1 <- matrix(,nrow=nrow(x), ncol=k)
d2 <- matrix(,nrow=nrow(x), ncol=k)
centres=x[sample(1:nrow(x),k,replace=FALSE),]
for循环
start.time1 <- Sys.time()
for(i in 1:n){
for(j in 1:k){
d1[i,j] <- sqrt(sum((x[i,]-centres[j,])^2))
}
}
end.time1 <- Sys.time()
time.taken1 <- end.time1 - start.time1
并行foreach循环
start.time2 <- Sys.time()
library(foreach)
library(doSNOW)
cl <- makeCluster(4, type="SOCK")
registerDoSNOW(cl)
for(j in 1:k){
d2[,j] <- foreach(i = 1:n, .combine='cbind') %dopar% sqrt(sum((x[i,]-centres[j,])^2))
}
end.time2 <- Sys.time()
time.taken2 <- end.time2 - start.time2
print("execution time without parall")
print(time.taken1)
print("execution time with parall")
print(time.taken2)
结果
# source('~/.active-rstudio-document')
# [1] "execution time without parall"
# Time difference of 0.335393 secs
# [1] "execution time with parall"
# Time difference of 9.425545 secs
警告讯息:
1: closing unused connection 34 (<-localhost127.0.0.1:11721)
2: closing unused connection 33 (<-localhost127.0.0.1:11721)
3: closing unused connection 32 (<-localhost127.0.0.1:11721)
4: closing unused connection 31 (<-localhost127.0.0.1:11721)