如何快速比较成对的数据行?这是一个MWE,但由于我有近10,000行(amount.of.baskets),它太慢了。
MWE计算从一个篮子到另一个篮子需要添加或减少的水果数量。
library(data.table)
set.seed(123)
amount.of.baskets <- 10
the.baskets <- data.table(apples=sample(x=c(0:9), amount.of.baskets, replace=TRUE), oranges=sample(x=c(0:9), amount.of.baskets, replace=TRUE), pears=sample(x=c(0:9), amount.of.baskets, replace=TRUE), bananas=sample(x=c(0:9), amount.of.baskets, replace=TRUE), pineapples=sample(x=c(0:9), amount.of.baskets, replace=TRUE), avocados=sample(x=c(0:9), amount.of.baskets, replace=TRUE), nectarines=sample(x=c(0:9), amount.of.baskets, replace=TRUE), limes=sample(x=c(0:9), amount.of.baskets, replace=TRUE), cherries=sample(x=c(0:9), amount.of.baskets, replace=TRUE), melons=sample(x=c(0:9), amount.of.baskets, replace=TRUE))
basket.diff.table <- NULL
for (from.i in 1:amount.of.baskets) {
for (to.i in 1:amount.of.baskets) {
tmp.i <- rbind(((-1)*the.baskets[from.i, ]), the.baskets[to.i, ])
tmp.sum <- data.table(t(colSums(tmp.i)))
tmp.sum[, c("from.basket", "to.basket"):=list(from.i, to.i)]
basket.diff.table <- rbind(basket.diff.table, tmp.sum)
}
}
basket.diff.table是所需的输出。
答案 0 :(得分:3)
要将函数应用于两个向量的所有组合,通常可以使用outer
。
outer_diffs <- lapply(
the.baskets,
FUN = function(x) {
as.vector(outer(x, x, "-"))
}
)
str(outer_diffs)
# List of 10
# $ apples : int [1:100] 0 5 2 6 7 -2 3 6 3 2 ...
# $ oranges : int [1:100] 0 -5 -3 -4 -8 -1 -7 -9 -6 0 ...
# $ pears : int [1:100] 0 -2 -2 1 -2 -1 -3 -3 -6 -7 ...
# $ bananas : int [1:100] 0 0 -3 -2 -9 -5 -2 -7 -6 -7 ...
# $ pineapples: int [1:100] 0 3 3 2 0 0 1 3 1 7 ...
# $ avocados : int [1:100] 0 4 7 1 5 2 1 7 8 3 ...
# $ nectarines: int [1:100] 0 -6 -3 -4 2 -2 2 2 1 -2 ...
# $ limes : int [1:100] 0 -1 0 -7 -3 -5 -4 -1 -4 -6 ...
# $ cherries : int [1:100] 0 4 2 5 -1 2 7 6 6 -1 ...
# $ melons : int [1:100] 0 5 2 5 2 0 6 -1 3 4 ...
basket.diff.table <- as.data.table(outer_diffs)
basket_indices <- seq_len(nrow(the.baskets))
basket.diff.table[, ":="(
from.basket = rep(basket_indices, each = nrow(the.baskets)),
to.basket = rep(basket_indices, times = nrow(the.baskets))
)]
basket.diff.table[1:5]
# apples oranges pears bananas pineapples avocados nectarines limes cherries melons from.basket to.basket
# 1: 0 0 0 0 0 0 0 0 0 0 1 1
# 2: 5 -5 -2 0 3 4 -6 -1 4 5 1 2
# 3: 2 -3 -2 -3 3 7 -3 0 2 2 1 3
# 4: 6 -4 1 -2 2 1 -4 -7 5 5 1 4
# 5: 7 -8 -2 -9 0 5 2 -3 -1 2 1 5
<强>更新强>
我将上述内容作为我的答案,因为它更像是对解决方案的逐步演示。但是,正如@Henrik指出的那样,通过使用data.table
包执行更多操作,可以缩短和加快此代码的速度。他的版本:
n <- nrow(the.baskets)
basket.diff.table2 <- the.baskets[, c(
lapply(.SD, function(x) as.vector(outer(x, x, "-"))),
CJ(from.basket = 1:n, to.basket = 1:n)
)]