给出这个数据示例:
library(data.table)
dat=structure(list(bin = structure(c(1L, 2L, 4L, 3L, 5L, 6L, 1L,
2L, 4L, 3L, 5L, 6L, 1L, 2L, 4L, 3L, 5L, 6L, 1L, 2L), .Label = c("a","b", "c", "c", "d", "e"), class =
"factor"),
value = c(3, 7, 0,
1, 1, 4, 8,4, 8, 7,2, 2, 4,6, 7, 17, 1, 2, 4, 7), nvar = c("Ecor",
"Ecor", "Ecor", "Ecor", "Ecor",
"Ecor", "AM", "AM", "AM", "AM", "AM",
"AM", "SS", "SS", "SS", "SS", "SS", "SS",
"ACC", "ACC")), row.names = c(NA, -20L), class = c("data.table",
"data.frame"))
我想计算(使用值)的平均值
输出可以是这样的:
bin value nvar
1: a 3 cat1
2: b 7 cat1
3: c 0 cat1
4: c 1 cat1
5: d 1 cat1
6: e 4 cat1
7: a 8 cat2
8: b 4 cat2
9: c 8 cat2
10: c 7 cat2
11: d 2 cat2
12: e 2 cat2
13: a 4 SS
14: b 6 SS
15: c 7 SS
16: c 17 SS
17: d 1 SS
答案 0 :(得分:1)
您可以使用 fifelse
根据条件创建新列并计算每个组的平均值。
library(data.table)
dat[, nvar := fifelse(nvar %in% c('Ecor', 'ACC'), 'cat1',
fifelse(nvar == 'SS', 'SS', 'cat2'))]
dat[, .(value = mean(value)), .(bin, nvar)]
答案 1 :(得分:0)
我们可以使用 fcase
中的 data.table
并避免嵌套比较
library(data.table)
dat[, .(value = mean(value)), .(bin,
nvar = fcase(nvar %chin% c("Ecor", "ACC"), "cat1",
nvar == "SS", "SS", default = "cat2"))]
-输出
# bin nvar value
# 1: a cat1 3.5
# 2: b cat1 7.0
# 3: c cat1 0.0
# 4: c cat1 1.0
# 5: d cat1 1.0
# 6: e cat1 4.0
# 7: a cat2 8.0
# 8: b cat2 4.0
# 9: c cat2 8.0
#10: c cat2 7.0
#11: d cat2 2.0
#12: e cat2 2.0
#13: a SS 4.0
#14: b SS 6.0
#15: c SS 7.0
#16: c SS 17.0
#17: d SS 1.0
#18: e SS 2.0