我试图找到最快速有效的方法来追加向量。我测试了5种不同的附加方法,发现大多数快速方法只是使用预分配(参见append3函数):
# using generic combine
append1 <- function(n) {
v <- numeric()
for (i in 1:n) {
v <- c(v, i)
}
v
}
# using length and auto extending
append2 <- function(n) {
v <- numeric()
for (i in 1:n) {
v[length(v) + 1] <- i
}
v
}
# using preallocation
append3 <- function(n) {
v <- numeric(n)
for (i in 1:n) {
v[i] <- i
}
v
}
# using append
append4 <- function(n) {
v <- numeric()
for (i in 1:n) {
v <- append(v, i)
}
v
}
# using union
append5 <- function(n) {
v <- numeric()
for (i in 1:n) {
v <- union(v, i)
}
v
}
library(microbenchmark)
microbenchmark(append1(10000), append2(10000), append3(10000), append4(10000), append5(10000), times = 5)
# Unit: milliseconds
# expr min lq median uq max neval
# append1(10000) 338.77588 341.06664 342.66819 360.11682 413.78760 5
# append2(10000) 372.71939 373.20159 375.15096 385.76314 431.04495 5
# append3(10000) 23.26534 23.27922 23.59688 23.68247 24.80935 5
# append4(10000) 373.60041 373.91250 434.95227 435.57716 440.97028 5
# append5(10000) 6382.45524 6425.84974 6445.28719 6520.39599 6572.08553 5
但预分配需要知道矢量的初始容量。我想知道是否有另一种动态附加的快速方法,无需预先分配。
答案 0 :(得分:0)
我使用惰性预分配测试了方法,发现最佳预分配大小约为向量长度的5%:
# using preallocation if necessary
append6 <- function(n, m, k) {
v <- numeric(m)
len <- m
seqN <- 1:n
for (i in seqN) {
if (i > len) {
newLen <- len * k
v <- c(v, numeric(ifelse(newLen >= 1, newLen, 1)))
len <- length(v)
}
v[i] <- i
}
v[seqN]
}
microbenchmark(append3(10000), append6(10000, 1, 0.01), append6(10000, 1, 0.02), append6(10000, 1, 0.03), append6(10000, 1, 0.05), times = 5)
#Unit: milliseconds
# expr min lq median uq max neval
# append3(10000) 22.63332 23.33605 23.84207 24.79069 25.14729 5
#append6(10000, 1, 0.01) 47.97180 48.02801 48.55201 56.94827 57.34071 5
#append6(10000, 1, 0.02) 39.13763 39.23551 41.39761 41.48553 42.57642 5
#append6(10000, 1, 0.03) 36.50014 37.07902 40.95009 44.17062 46.30302 5
#append6(10000, 1, 0.05) 34.67124 34.88921 35.01459 36.77056 43.55909 5
microbenchmark(append3(100000), append6(100000, 1, 0.01), append6(100000, 1, 0.02), append6(100000, 1, 0.03), append6(100000, 1, 0.05), times = 5)
#Unit: milliseconds
# expr min lq median uq max neval
# append3(1e+05) 241.0875 241.1899 241.9886 250.7469 257.6539 5
#append6(1e+05, 1, 0.01) 439.9242 496.1509 500.2590 501.2341 505.9891 5
#append6(1e+05, 1, 0.02) 381.0249 382.8471 383.3789 391.5930 460.3711 5
#append6(1e+05, 1, 0.03) 357.0127 359.9495 361.5947 371.0794 383.7376 5
#append6(1e+05, 1, 0.05) 345.7090 347.6097 349.7010 371.0359 377.4635 5
可以看出append6方法比append3方法慢约45%。似乎这是支付速度和内存使用之间的平衡。