rm(list = ls())
a <- seq(from = 1, to = 50000, by = 1)
b <- seq(from = 1, to = 10000, by = 2)
c <- seq(from = 1, to = 10000, by = 3)
two <- rep(NA, length(a))
three <- rep(NA, length(a))
system.time(
for (i in seq_along(a))
{
if (length(tail(which(a[i] > b),1)) != 0 & length(tail(which(a[i] > c),1)) != 0)
{
two[i] <- tail(which(a[i] > b),1)
three[i] <- tail(which(a[i] > c),1)
}
else
{
two[i] <- NA
three[i] <- NA
}
}
)
build_b <- b[two]
build_c <- c[three]
我要做的是找到b
时c
和a
的样子。所以我在向量two
和three
中预先定位内存以试图节省一些时间,因此我可以跟踪这些事件的索引。循环完成后,我根据刚刚计算的索引构建新的向量。目前,该操作需要大约10秒来计算。我的问题是如何加快这项操作?
谢谢!
答案 0 :(得分:3)
以下是使用findInterval
的另一种解决方案:
## assume a, b and c are sorted
two <- findInterval(a-1L, b)
three <- findInterval(a-1L, c)
two[two==0] <- NA
three[three==0] <- NA
build_b <- b[two]
build_c <- c[three]
这里有一点基准:
a <- seq(from = 1, to = 50000, by = 1)
b <- seq(from = 1, to = 10000, by = 2)
c <- seq(from = 1, to = 10000, by = 3)
pops <- function(a, b, c) {
two <- rep(NA, length(a))
three <- rep(NA, length(a))
for (i in seq_along(a))
{
if (length(tail(which(a[i] > b),1)) != 0 & length(tail(which(a[i] > c),1)) != 0)
{
two[i] <- tail(which(a[i] > b),1)
three[i] <- tail(which(a[i] > c),1)
}
else
{
two[i] <- NA
three[i] <- NA
}
}
return(list(b=b[two], c=c[three]))
}
droopy <- function(a, b, c) {
two <- rep(NA, length(a))
three <- rep(NA, length(a))
for (i in seq_along(a))
{
if (any(u <- (a[i] > b)) & any(v <- (a[i] > c)))
{
two[i] <- sum(u)
three[i] <- sum(v)
}
else
{
two[i] <- NA
three[i] <- NA
}
}
return(list(b=b[two], c=c[three]))
}
sgibb <- function(a, b, c) {
## assume a, b and c are sorted
two <- findInterval(a-1L, b)
three <- findInterval(a-1L, c)
two[two==0] <- NA
three[three==0] <- NA
return(list(b=b[two], c=c[three]))
}
基准:
library("rbenchmark")
benchmark(pops(a, b, c), droopy(a, b, c), sgibb(a, b, c), order="relative", replications=2)
# test replications elapsed relative user.self sys.self user.child sys.child
#3 sgibb(a, b, c) 2 0.010 1.0 0.008 0.004 0 0
#2 droopy(a, b, c) 2 8.639 863.9 8.613 0.000 0 0
#1 pops(a, b, c) 2 26.838 2683.8 26.753 0.004 0 0
identical(pops(a, b, c), sgibb(a, b, c))
## TRUE
identical(droopy(a, b, c), sgibb(a, b, c))
## TRUE
答案 1 :(得分:1)
一种可能性:
a <- seq(from = 1, to = 50000, by = 1)
b <- seq(from = 1, to = 10000, by = 2)
c <- seq(from = 1, to = 10000, by = 3)
two <- integer(length(a))
three <- integer(length(a))
system.time(
{
for (i in seq_along(a))
{
if (any(u <- (a[i] > b)) & any(v <- (a[i] > c)))
{
two[i] <- sum(u)
three[i] <- sum(v)
}
else
{
two[i] <- NA
three[i] <- NA
}
}
})
build_b <- b[two]
build_c <- c[three]