Question

我有这个代码，我不知道优化，我认为这是我最好的表现。你是否有办法选择优化名单？

require(dplyr); require(rgeos); require(sp)
sim.polygons = function(objects, vertex){
  polygons = NULL
  for(i in 1:objects) polygons[[i]] = matrix(runif(vertex*2), ncol = 2)
  return(polygons)
}

test = function(lista1, lista2, progress = F){
  lista1 = lapply(lista1, as, Class = "gpc.poly")
  lista2 = lapply(lista2,  as, Class = "gpc.poly")
  res = matrix(0, nrow = length(lista2), ncol = length(lista1))
  for(k in 1 : length(lista1)){
    for(l in 1 : length(lista2)){
      res[l, k] = area.poly(intersect(lista1[[k]], lista2[[l]])) #very slow
    }
    if(progress == T) print(k)
  }
  res
}
#exemple
a = sim.polygons(50, 3) #in my problem, objects = 144 and vertex = 3
b = sim.polygons(100, 3) #objects = 114^2 and vertex = 3

test(a, b, T)

Answer 1

与foreach和doParallel并行实施此问题非常容易。在此示例中，我将您的for循环与您在示例评论中描述的问题的foreach循环进行比较。

require(dplyr); require(rgeos); require(sp)
sim.polygons = function(objects, vertex){
  polygons = NULL
  for(i in 1:objects) polygons[[i]] = matrix(runif(vertex*2), ncol = 2)
  return(polygons)
}

test = function(lista1, lista2, progress = F){
  lista1 = lapply(lista1, as, Class = "gpc.poly")
  lista2 = lapply(lista2,  as, Class = "gpc.poly")
  res = matrix(0, nrow = length(lista2), ncol = length(lista1))
  for(k in 1 : length(lista1)){
    for(l in 1 : length(lista2)){
      res[l, k] = area.poly(intersect(lista1[[k]], lista2[[l]])) #very slow
    }
    if(progress == T) print(k)
  }
  res
}

a = sim.polygons(144, 3) #in my problem, objects = 144 and vertex = 3
b = sim.polygons(114, 3) #objects = 114^2 and vertex = 3

system.time(res<-test(a, b, T))
user  system elapsed 
34.66    0.02   34.67 

library(foreach)
library(doParallel)
cl<-makeCluster(6)
registerDoParallel(cl)
getDoParWorkers() #6

foreach(i=1:6) %dopar% library(rgeos)

test.par = function(lista1, lista2, progress = F){
  lista1 = lapply(lista1, as, Class = "gpc.poly")
  lista2 = lapply(lista2,  as, Class = "gpc.poly")
  res = matrix(0, nrow = length(lista2), ncol = length(lista1))
  res<-foreach(k= 1 : length(lista1), .combine = "cbind") %:%
    foreach(l = 1 : length(lista2), .combine = 'c') %dopar% #not as slow
      area.poly(intersect(lista1[[k]], lista2[[l]]))
    }


system.time(res.par<-test.par(a, b, T))
user  system elapsed 
7.97    0.46   15.51 

dim(res)
[1] 114 144

dim(res.par)
[1] 114 144

sum(rowSums(res-res.par))
[1] 0

这种实现有效地减少了在6个内核上运行一半的计算时间。您的结果可能会因核心数量不同而有所不同。在循环中聪明的编程可能还有更多的好处。

优化大清单

1 个答案: