R三角网

时间:2018-01-18 12:05:54

标签: r optimization plyr microbenchmark

我正在为ggtern包进行一些开发,我正在努力制作一个有效的算法来处理三元热图。具体来说,我使用以下帖子(Ternary Heatmap)作为起点。

考虑以下功能,该功能基于上述链接(部分):

# Produce Points for Triangular Mesh
triMesh = function( n = 1){
  n    = max(as.integer(n[1]),1)
  p    = data.frame()
  cnt  = 0
  inc  = 1.0/n
  stp  = seq(0,1,length.out = n + 1 )

  for (ix in seq_along(stp)){
    z <- stp[ix]
    y <- 1 - z
    x <- 0
    while ( y >= 0 ) {
      p   <- rbind(p, c(cnt, x, y, z))
      y   <- y - inc #Step x down
      x   <- x + inc #Step y up
      cnt <- cnt + 1          #Increment Count
    }
  }
  colnames(p) = c("IDPoint","x","y","z")
  p = round(p[with(p,order(y,x,-z)),],6)
  rownames(p) = 1:nrow(p) - 1
  p
}

这是我的版本,在语法上更简洁:

# Produce Points for Triangular Mesh
triMesh2 = function( n = 1 ){
  n   = as.integer(max(n[1],1))

  #Nested plyr calls
  result = ldply(0:n,function(y){ ##OUTER
    ldply(0:(n-y),function(x){    ##INNER
      data.frame(x,y,z = n -x -y) ##DIFF
    })
  })

  result        = data.frame( 1:nrow(result)-1,result/n)
  names(result) = c('IDPoint','x','y','z')
  result
}

现在,使用microbenchmark,第一种算法的速度提高了几倍:

> microbenchmark(triMesh(10))
Unit: milliseconds
        expr      min      lq     mean   median       uq      max neval
 triMesh(10) 6.447525 6.91798 8.432698 7.334905 8.727805 23.37242   100

> microbenchmark(triMesh2(10))
Unit: milliseconds
         expr      min       lq     mean   median       uq     max neval
 triMesh2(10) 27.26659 29.34891 32.50808 31.43524 34.92925 51.8585   100
> 

我想知道是否有人可以将第二种算法的性能提高到第一种(或更好)附近的某些东西......

干杯

2 个答案:

答案 0 :(得分:2)

通常,简单地使用矢量会更快:

triMesh3 <- function(n = 1){
  n <- as.integer(max(n[1], 1))
  result <- lapply(0:n, function(y){ 
    l <- lapply(0:(n - y), function(x){    
      c(x = x, y = y, z = n - x - y) 
    })
    Reduce(rbind, l)
  })
  result <- Reduce(rbind, result)
  row.names(result) <- NULL
  result <- cbind(1:nrow(result) - 1, result/n)
  result <- as.data.frame(result)
  names(result) <- c('IDPoint', 'x', 'y', 'z')
  result
}

all.equal(triMesh3(12), triMesh2(12))
# [1] TRUE
microbenchmark::microbenchmark(triMesh3(10),
                               triMesh2(10), 
                               triMesh(10), times = 100, unit = "relative")
# Unit: relative
#         expr      min       lq     mean   median       uq      max neval cld
# triMesh3(10)  1.00000  1.00000  1.00000  1.00000  1.00000  1.00000   100 a  
# triMesh2(10) 92.16829 89.07131 86.66111 88.32173 85.68915 63.29785   100   c
#  triMesh(10) 30.60108 29.70537 29.61635 29.83430 30.11924 32.40393   100  b 

@CPak你打败了我一点,我也想更新我的答案:

triMesh4_minem <- function(n = 1){
  n <- as.integer(max(n[1], 1))
  y1 <- 0:n
  ys <- n - y1 + 1
  y <- sapply(1:(n + 1), function(x) y1[1:ys[x]])
  y <- unlist(y)
  x <- rep(y1, times = ys)
  result2 <- cbind(1:(length(x)) - 1, y/n, x/n, (n - y - x)/n)
  result <- as.data.frame.matrix(result2)
  names(result) <- c('IDPoint', 'x', 'y', 'z')
  result
}
all.equal(triMesh4_minem(2), triMesh4_cpack(2))
# [1] TRUE

microbenchmark::microbenchmark(triMesh4_minem(1e4),
                               triMesh4_cpack(1e4),
                               times = 10, unit = "relative")
# Unit: relative
#                 expr      min       lq     mean   median       uq      max neval cld
# triMesh4_minem(10000) 2.659507 2.572209 2.121967 1.965973 1.906203 1.905907    10   b
# triMesh4_cpack(10000) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000    10  a 

答案 1 :(得分:1)

我想提供另一种生成数据的方法。 Reduce(...)的问题在于它通常无法随着增加N

而扩展
triMesh4 <- function(n=1) {
    n <- as.integer(max(n[1], 1))
    temp <- seq(0, n, 1)
    df <- data.frame(
                x = unlist(sapply((n+1):1, function(i) temp[1:i])),
                y = rep(0:n, (n+1):1)
            )
    df$z <- n - df$x - df$y
    df <- cbind(0:(nrow(df)-1), df / n)
    names(df) <- c('IDPoint', 'x', 'y', 'z')
    return(df)
}

all.equal(triMesh3(12), triMesh4(12))
# [1] TRUE

library(microbenchmark)
N <- c(12, 16, 100)
lapply(N, function(i) microbenchmark(triMesh3(i), triMesh4(i), times=10L, unit="relative"))
# [[1]]
# Unit: relative
        # expr      min       lq     mean   median       uq      max neval
 # triMesh3(i) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000    10
 # triMesh4(i) 1.484984 1.472767 1.466758 1.474142 1.470987 1.392629    10

# [[2]]
# Unit: relative
        # expr      min       lq     mean   median       uq       max neval
 # triMesh3(i) 1.075225 1.081014 1.017441 1.024083 1.015504 0.8398393    10
 # triMesh4(i) 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000    10

# [[3]]
# Unit: relative
        # expr      min       lq     mean  median       uq      max neval
 # triMesh3(i) 23.67992 23.33367 22.79632 23.2149 21.89245 21.32084    10
 # triMesh4(i)  1.00000  1.00000  1.00000  1.0000  1.00000  1.00000    10