我有一个像这样的数据集:
> dput(data_melt)
structure(list(Compound = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Compd1", "Compound1"
), class = "factor"), Concentration = structure(c(5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 5L, 1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 5L, 1L, 2L, 3L, 4L, 5L, 1L,
2L, 3L, 4L, 5L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c(".01uM",
".1uM", "1.0uM", "10uM", "DMSO"), class = "factor"), Co.Agonist = structure(c(1L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("High/High",
"High/Low", "Low/High"), class = "factor"), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), class = "factor", .Label = c("Tau1",
"Tau2", "Peak.Mean", "Area.Mean", "SS1.Mean")), value = c(1.38196479885153,
1.14405508500302, 0.988961970528585, 1.44849126088629, 0.492462666110073,
NA, 2.17712741416582, 1.67028746892543, 1.96489211952819, 1.2460774224718,
NA, 0.485501088636007, 0.580120526488655, 0.530538989313623,
0.884536240505712, 0.322958355856638, 0.740882962734369, 1.18088839355135,
1.48745421674211, 1.16792544841743, 1.11656132754921, 1.14457816659658,
0.0675070264176897, 0.176054869732887, 0.174862277854592, 0.200470189214318,
0.187717771153427, 0.181176140081454, 0.117339926372974, 0.0941816692818621,
0.156408537242293, 0.171156092362873, 0.0642141717879837, 0.107013341555486,
0.0892122245482354, 0.151976744172333, 0.198474636073771, 0.188703600586299,
0.10970902239241, 0.117358989261514, 0.100312892958432, 0.118208485589655,
0.154895187369863, 0.101035151359696, -3926.26508451201, -696.475731092535,
-4384.77847338655, -718.718487256701, -3164.8941685203, -818.006663108841,
-4658.25223372398, -826.496302684798, -2416.89272653148, -2558.96929067338,
-672.257745869921, -3996.85447223941, -706.5215296652, -4190.52281192937,
-726.870892539311, -2852.22943401345, -831.88857277573, -4580.4780146496,
-816.712564805672, -6189.10619924791, -2540.95473989213, -713.422629648631,
7712.37025286162, 1906.38208801373, 7583.22998649368, 1879.68741296455,
7424.47445663593, 2236.40541039894, 6178.69685860507, 2064.41869983299,
5953.06397562968, 6373.90332689516, 1813.36551434687, 4585.08608292281,
1524.44544360278, 4343.20955707026, 1547.28354007935, 4772.3012092321,
1889.5819203618, 4251.33850498831, 1687.78145119834, 5978.28926211454,
3879.07376129486, 1533.61842684178, -622.178041494169, -301.118488704851,
-618.132026278872, -302.614140229218, -821.035687044046, -383.60893819189,
-683.302506820162, -331.474546574133, -764.139865695781, -546.931098421476,
-300.006976301825, -622.178041494169, -301.118488704851, -618.132026278872,
-302.614140229218, -821.035687044046, -383.60893819189, -683.302506820162,
-331.474546574133, -764.139865695781, -546.931098421476, -300.006976301825
)), .Names = c("Compound", "Concentration", "Co.Agonist", "variable",
"value"), row.names = c(NA, -110L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x0000000000120788>)
>
我的目标是通过前面的列(化合物名称,浓度,Co.Agonist和变量)汇总值列。
我试过这个:
DatAgg = data_melt %>% group_by(Concentration,Co.Agonist,variable)%>%
summarise(mean=mean(data_melt$value,na.rm=TRUE),sd=sd(data_melt$value))
但输出不会将data.frame折叠为唯一的组合,并且只返回所有变量的一个值。
我希望使用这个dplyr解决方案来获得group_by变量的每个唯一组合的mean和sd。
答案 0 :(得分:3)
语法应为
data_melt %>%
group_by(Concentration,Co.Agonist,variable) %>%
summarise(Mean = mean(value, na.rm=TRUE),
Sd = sd(value, na.rm=TRUE))
在OP的代码中,在按变量分组后,它取整个“值”列(mean
)的mean(data_melt$value, na.rm=TRUE)
。因此,将有一个唯一的值。
由于'data_melt'是data.table
对象,因此也可以使用data.table方法。
data_melt[, .(Mean = mean(value, na.rm=TRUE), Sd = sd(value, na.rm=TRUE)),
by = .(Concentration, Co.Agonist, variable)]