这是this question的后续行动。想知道如何在这个问题中使用.SD
(而不是分别为每个变量进行计算,在这种情况下分别为Y1和Y2)。
set.seed(12345)
A <- rep(x=paste0("A", 1:2), each=6)
B <- rep(x=paste0("B", 1:3), each=2, times=2)
Rep <- rep(x=1:2, times=3)
Y1 <- rnorm(n=12, mean = 50, sd = 5)
Y2 <- rnorm(n=12, mean = 50, sd = 10)
library(data.table)
dt <- data.table(A, B, Rep, Y1, Y2)
dt[, j = Eff1 := mean(Y1), .(A, B)][, j = Eff1 := Eff1 - mean(Y1), .(A)][, j = Eff1 := Eff1 - mean(Y1), .(B)][, j = Eff1 := Eff1 + mean(Y1)]
dt[, j = Eff2 := mean(Y2), .(A, B)][, j = Eff2 := Eff2 - mean(Y2), .(A)][, j = Eff2 := Eff2 - mean(Y2), .(B)][, j = Eff2 := Eff2 + mean(Y2)]
dt[, j = .(Eff1 = mean(Eff1), Eff2 = mean(Eff2)), by = .(A, B)]
答案 0 :(得分:2)
就我个人而言,我会考虑使用ave
:
my_cols = c("Y1", "Y2")
tmp_cols = c("Eff1", "Eff2")
dt[, (tmp_cols) :=
lapply(.SD, function(x) mean(x) + ave(x, A, B) - ave(x, A) - ave(x, B))
, .SDcols = my_cols][,
lapply(.SD, mean)
, by=A:B, .SDcols = tmp_cols]
有很长的路要走:
dtA = dt[, lapply(.SD, mean), by=A, .SDcols = my_cols]
dtB = dt[, lapply(.SD, mean), by=B, .SDcols = my_cols]
dtAB = dt[, lapply(.SD, mean), by=.(A,B), .SDcols = my_cols]
dt[, (tmp_cols) := lapply(.SD, mean), .SDcols = my_cols]
dt[dtAB,(tmp_cols) := Map(`+`, mget(tmp_cols), mget(paste0("i.", my_cols))), on=c("A","B")]
dt[dtA, (tmp_cols) := Map(`-`, mget(tmp_cols), mget(paste0("i.", my_cols))), on="A"]
dt[dtB, (tmp_cols) := Map(`-`, mget(tmp_cols), mget(paste0("i.", my_cols))), on="B"]
dt[, lapply(.SD, mean), by=.(A,B), .SDcols=tmp_cols]