我正在为ggtern包进行一些开发,我正在努力制作一个有效的算法来处理三元热图。具体来说,我使用以下帖子(Ternary Heatmap)作为起点。
考虑以下功能,该功能基于上述链接(部分):
# Produce Points for Triangular Mesh
triMesh = function( n = 1){
n = max(as.integer(n[1]),1)
p = data.frame()
cnt = 0
inc = 1.0/n
stp = seq(0,1,length.out = n + 1 )
for (ix in seq_along(stp)){
z <- stp[ix]
y <- 1 - z
x <- 0
while ( y >= 0 ) {
p <- rbind(p, c(cnt, x, y, z))
y <- y - inc #Step x down
x <- x + inc #Step y up
cnt <- cnt + 1 #Increment Count
}
}
colnames(p) = c("IDPoint","x","y","z")
p = round(p[with(p,order(y,x,-z)),],6)
rownames(p) = 1:nrow(p) - 1
p
}
这是我的版本,在语法上更简洁:
# Produce Points for Triangular Mesh
triMesh2 = function( n = 1 ){
n = as.integer(max(n[1],1))
#Nested plyr calls
result = ldply(0:n,function(y){ ##OUTER
ldply(0:(n-y),function(x){ ##INNER
data.frame(x,y,z = n -x -y) ##DIFF
})
})
result = data.frame( 1:nrow(result)-1,result/n)
names(result) = c('IDPoint','x','y','z')
result
}
现在,使用microbenchmark,第一种算法的速度提高了几倍:
> microbenchmark(triMesh(10))
Unit: milliseconds
expr min lq mean median uq max neval
triMesh(10) 6.447525 6.91798 8.432698 7.334905 8.727805 23.37242 100
> microbenchmark(triMesh2(10))
Unit: milliseconds
expr min lq mean median uq max neval
triMesh2(10) 27.26659 29.34891 32.50808 31.43524 34.92925 51.8585 100
>
我想知道是否有人可以将第二种算法的性能提高到第一种(或更好)附近的某些东西......
干杯
答案 0 :(得分:2)
通常,简单地使用矢量会更快:
triMesh3 <- function(n = 1){
n <- as.integer(max(n[1], 1))
result <- lapply(0:n, function(y){
l <- lapply(0:(n - y), function(x){
c(x = x, y = y, z = n - x - y)
})
Reduce(rbind, l)
})
result <- Reduce(rbind, result)
row.names(result) <- NULL
result <- cbind(1:nrow(result) - 1, result/n)
result <- as.data.frame(result)
names(result) <- c('IDPoint', 'x', 'y', 'z')
result
}
all.equal(triMesh3(12), triMesh2(12))
# [1] TRUE
microbenchmark::microbenchmark(triMesh3(10),
triMesh2(10),
triMesh(10), times = 100, unit = "relative")
# Unit: relative
# expr min lq mean median uq max neval cld
# triMesh3(10) 1.00000 1.00000 1.00000 1.00000 1.00000 1.00000 100 a
# triMesh2(10) 92.16829 89.07131 86.66111 88.32173 85.68915 63.29785 100 c
# triMesh(10) 30.60108 29.70537 29.61635 29.83430 30.11924 32.40393 100 b
@CPak你打败了我一点,我也想更新我的答案:
triMesh4_minem <- function(n = 1){
n <- as.integer(max(n[1], 1))
y1 <- 0:n
ys <- n - y1 + 1
y <- sapply(1:(n + 1), function(x) y1[1:ys[x]])
y <- unlist(y)
x <- rep(y1, times = ys)
result2 <- cbind(1:(length(x)) - 1, y/n, x/n, (n - y - x)/n)
result <- as.data.frame.matrix(result2)
names(result) <- c('IDPoint', 'x', 'y', 'z')
result
}
all.equal(triMesh4_minem(2), triMesh4_cpack(2))
# [1] TRUE
microbenchmark::microbenchmark(triMesh4_minem(1e4),
triMesh4_cpack(1e4),
times = 10, unit = "relative")
# Unit: relative
# expr min lq mean median uq max neval cld
# triMesh4_minem(10000) 2.659507 2.572209 2.121967 1.965973 1.906203 1.905907 10 b
# triMesh4_cpack(10000) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 10 a
答案 1 :(得分:1)
我想提供另一种生成数据的方法。 Reduce(...)
的问题在于它通常无法随着增加N
triMesh4 <- function(n=1) {
n <- as.integer(max(n[1], 1))
temp <- seq(0, n, 1)
df <- data.frame(
x = unlist(sapply((n+1):1, function(i) temp[1:i])),
y = rep(0:n, (n+1):1)
)
df$z <- n - df$x - df$y
df <- cbind(0:(nrow(df)-1), df / n)
names(df) <- c('IDPoint', 'x', 'y', 'z')
return(df)
}
all.equal(triMesh3(12), triMesh4(12))
# [1] TRUE
library(microbenchmark)
N <- c(12, 16, 100)
lapply(N, function(i) microbenchmark(triMesh3(i), triMesh4(i), times=10L, unit="relative"))
# [[1]]
# Unit: relative
# expr min lq mean median uq max neval
# triMesh3(i) 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 10
# triMesh4(i) 1.484984 1.472767 1.466758 1.474142 1.470987 1.392629 10
# [[2]]
# Unit: relative
# expr min lq mean median uq max neval
# triMesh3(i) 1.075225 1.081014 1.017441 1.024083 1.015504 0.8398393 10
# triMesh4(i) 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 10
# [[3]]
# Unit: relative
# expr min lq mean median uq max neval
# triMesh3(i) 23.67992 23.33367 22.79632 23.2149 21.89245 21.32084 10
# triMesh4(i) 1.00000 1.00000 1.00000 1.0000 1.00000 1.00000 10