Question

我是R的新手，之前从未尝试过并行计算。我正在使用以下函数来查找完美数字（我知道这不是找到完美数字的最有效方法，因为使用Mersanne Primes要快得多）：

factorlist <- function(n) {
  if(n < 1) {return("Invalid input: n must be a natural number")}
  if(n%%1 != 0) {return("Invalid input: n must be a natural number")}
  div <- 0
  if(n%%2 != 0) {
    for(i in seq(1, n-1, by = 2)) {
      if(n%%i == 0) {
        div[length(div)] <- i
        div <- c(div, 0)
      }
    }
    div <- div[-length(div)]
    return(div)
  }
  if(n%%2 == 0) {
    for(i in seq(1, n-1)) {
      if(n%%i == 0) {
        div[length(div)] <- i
        div <- c(div, 0)
      }
    }
    div <- div[-length(div)]
    return(div)
  }
}

perfectcheckN <- function(n) {
  if(n < 1) {return("Invalid input: n must be a natural number")}
  if(n%%1 != 0) {return("Invalid input: n must be a natural number")}
  start.time <- Sys.time()
  perf <- 0
  for(i in 6:n) {
    div <- factorlist(i)
    num <- 0
    for(j in 1:length(div)) {
      num <- num + div[j]
    }
    if(num == i) {
      perf[length(perf)] <- num
      perf <- c(perf, 0)
    }
  }
  perf <- perf[-length(perf)]
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  return(cat(perf, "Time taken:", time.taken, "seconds."))
}

我的CPU有12个线程，运行此代码通常使用单个线程，并且需要很长时间才能使用大量输入。有没有办法并行化代码，使其运行更快？提前致谢

Answer 1

初步方法

一个没有太多代码更改的简单方法是使用parallel::mclapply 假设您不在Windows上（如果您查看parallelsugar::mclapply的Windows实现）。然而，这并不理想，因为它预先将数字调度到每个线程的桶中。例如，如果我们在4核计算机上的数字为40000，那么存储桶将类似于：

第一个6:10000
第二个10001:20000
第三个20001:30000
第四个30001:40000

前3个线程将在最后一个之前完成。但是，这比原来的要快。使用我的 4核非超线程英特尔i5 3570K的单次运行给出了以下结果：

perfectcheckN(10000)

mclapply version: 6 28 496 8128 Time taken: 5.015653 seconds.
original version: 6 28 496 8128 Time taken: 14.84113 seconds.
speedup: 2.96x

代码：

library(parallel)

factorlist <- function(n) {
  if(n < 1) {return("Invalid input: n must be a natural number")}
  if(n%%1 != 0) {return("Invalid input: n must be a natural number")}
  div <- 0
  if(n%%2 != 0) {
    for(i in seq(1, n-1, by = 2)) {
      if(n%%i == 0) {
        div[length(div)] <- i
        div <- c(div, 0)
      }
    }
    div <- div[-length(div)]
    return(div)
  }
  if(n%%2 == 0) {
    for(i in seq(1, n-1)) {
      if(n%%i == 0) {
        div[length(div)] <- i
        div <- c(div, 0)
      }
    }
    div <- div[-length(div)]
    return(div)
  }
}

perfectcheckN <- function(n) {
  if(n < 1) {return("Invalid input: n must be a natural number")}
  if(n%%1 != 0) {return("Invalid input: n must be a natural number")}
  start.time <- Sys.time()

  perf <- mclapply(6:n, mc.cores = detectCores(), FUN = function(i) {
    div <- factorlist(i)
    num <- 0
    for(j in 1:length(div)) {
      num <- num + div[j]
    }
    if(num == i) {
      return(num)
    }
    return(NA)
  })
  perf <- t(as.data.frame(perf[!is.na(perf)]))
  perf <- c(perf, 0)
  perf <- perf[-length(perf)]
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  return(cat(perf, "Time taken:", time.taken, "seconds."))
}

更好的方法

mclapply有一个mc.preschedule选项，我们可以将其设置为F但这无济于事，因为它会为每个线程（而不是范围）执行每个整数。但是我们可以随机抽样6:n让所有线程运行大约相同的时间（我们需要对perf进行排序以获得相同的顺序）。无论如何，这将有很大帮助。除此之外，我会考虑优化你编写R代码的方式，但这不是你问的问题（但它也会加快速度）。

perfectcheckN(10000)

mclapply version: 6 28 496 8128 Time taken: 4.393636 seconds.
original version: 6 28 496 8128 Time taken: 14.84113 seconds.
speedup: 3.38x

更新代码：

library(parallel)

factorlist <- function(n) {
  if(n < 1) {return("Invalid input: n must be a natural number")}
  if(n%%1 != 0) {return("Invalid input: n must be a natural number")}
  div <- 0
  if(n%%2 != 0) {
    for(i in seq(1, n-1, by = 2)) {
      if(n%%i == 0) {
        div[length(div)] <- i
        div <- c(div, 0)
      }
    }
    div <- div[-length(div)]
    return(div)
  }
  if(n%%2 == 0) {
    for(i in seq(1, n-1)) {
      if(n%%i == 0) {
        div[length(div)] <- i
        div <- c(div, 0)
      }
    }
    div <- div[-length(div)]
    return(div)
  }
}

perfectcheckN <- function(n) {
  if(n < 1) {return("Invalid input: n must be a natural number")}
  if(n%%1 != 0) {return("Invalid input: n must be a natural number")}
  start.time <- Sys.time()

  perf <- mclapply(sample(6:n), mc.cores = detectCores(), FUN = function(i) {
    div <- factorlist(i)
    num <- 0
    for(j in 1:length(div)) {
      num <- num + div[j]
    }
    if(num == i) {
      return(num)
    }
    return(NA)
  })
  perf <- t(as.data.frame((perf[!is.na(perf)])))
  perf <- c(perf, 0)
  perf <- perf[-length(perf)]
  end.time <- Sys.time()
  time.taken <- end.time - start.time
  return(cat(sort(perf), "Time taken:", time.taken, "seconds."))
}

在R中并行化函数

1 个答案:

初步方法

更好的方法