我的目标是使用reshape2::acast()
创建一个二维数组,其中聚合函数是加权平均值。这是一个廉价的加权平均函数:
wmean <- function(x,w=rep(1,length(x))){
if (length(x)==0){
return(NA)
}
sum(x * w, na.rm = TRUE) / sum(w, na.rm = TRUE)
}
以下是一些数据(100行,4列)。
Dat <- structure(list(cafloor2 = c(62, 62, 64, 60, 62, 64, 70, 72, 74,
76, 78, 60, 62, 64, 66, 68, 60, 60, 62, 64, 66, 60, 62, 62, 62,
64, 66, 68, 70, 60, 62, 64, 60, 62, 64, 66, 68, 70, 62, 62, 64,
68, 60, 62, 64, 66, 60, 62, 60, 62, 64, 60, 62, 60, 62, 64, 62,
64, 66, 68, 70, 72, 74, 76, 62, 60, 62, 64, 60, 62, 64, 66, 68,
60, 62, 64, 66, 68, 72, 74, 78, 80, 60, 60, 62, 64, 66, 60, 62,
60, 64, 60, 62, 64, 66, 68, 82, 84, 88, 60), tafloor2 = c(4,
2, 0, 4, 2, 0, 8, 6, 4, 2, 0, 8, 6, 4, 2, 0, 0, 6, 4, 4, 0, 2,
0, 0, 12, 10, 10, 6, 4, 6, 4, 0, 12, 10, 6, 4, 4, 0, 8, 6, 4,
2, 12, 10, 6, 6, 2, 0, 6, 4, 2, 4, 2, 4, 2, 0, 14, 12, 10, 8,
6, 4, 2, 0, 0, 4, 2, 0, 8, 6, 2, 2, 0, 8, 6, 4, 2, 0, 8, 6, 2,
0, 8, 6, 4, 4, 2, 2, 0, 2, 0, 8, 6, 4, 2, 0, 8, 6, 2, 2), srh = c(4L,
5L, 3L, 1L, 2L, 3L, 4L, 4L, 4L, 4L, 3L, 4L, 5L, 5L, 5L, 5L, 3L,
5L, 5L, 5L, 5L, 5L, 4L, 5L, 2L, 2L, 3L, 2L, 2L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 2L, 2L, 4L, 5L, 5L, 3L, 5L, 4L, 4L, 4L, 3L, 2L, 1L,
2L, 2L, 4L, 3L, 4L, 4L, 4L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 2L,
5L, 5L, 5L, 2L, 4L, 3L, 2L, 1L, 4L, 3L, 3L, 4L, 4L, 3L, 3L, 4L,
2L, 4L, 5L, 5L, 4L, 5L, 3L, 4L, 4L, 3L, 4L, 3L, 4L, 4L, 4L, 3L,
4L, 4L, 4L), p_wt2 = c(3065, 3121, 3390, 6122, 5747, 6488, 2045,
1880, 3083, 2326, 2288, 1563, 1789, 1676, 1536, 1586, 1517, 2231,
2420, 2066, 2108, 2015, 2031, 6239, 4718, 4460, 4735, 5183, 5359,
5084, 5400, 5280, 4439, 4714, 4939, 5359, 5143, 5298, 2298, 2496,
2650, 2719, 2025, 2341, 1990, 2015, 5114, 5554, 4304, 3824, 3962,
4718, 5253, 1475, 1556, 1657, 1475, 1556, 1657, 1765, 1736, 1845,
1760, 1751, 1291, 1749, 1845, 1770, 1556, 1657, 1765, 1736, 1747,
1324, 1453, 1711, 1898, 1830, 1563, 1518, 971, 996, 1326, 1194,
1293, 1447, 1347, 1536, 1628, 1749, 1845, 1260, 1456, 1165, 1122,
1156, 1342, 2063, 1643, 1491)), .Names = c("cafloor2", "tafloor2",
"srh", "p_wt2"), row.names = c(NA, 100L), class = "data.frame")
现在,我可以使用以下方法将我的矩阵作为未加权的均值:
acast(Dat,cafloor2~tafloor2, value.var = "srh",
fun.aggregate = mean, na.rm=TRUE)
但我喜欢被称为的意思:
acast(Dat,cafloor2~tafloor2, value.var = "srh",
fun.aggregate = wmean, w = "p_wt2")
根据需要从Dat
获取权重。这给出了错误:
Error in x * w : non-numeric argument to binary operator
今天也许我的头很慢,但我仍然坚持如何继续前进。非acast()
解决方案当然是受欢迎的。谢谢!
答案 0 :(得分:2)
我似乎无法弄清楚如何使用acast
获得组的正确权重向量。鉴于此,我将通过首先按组计算摘要统计信息然后转换结果来解决此问题。以下是使用 dplyr 计算加权平均值的示例。
require(dplyr)
Dat %>% group_by(cafloor2, tafloor2) %>%
summarise(wsrh = wmean(srh, p_wt2)) %>%
acast(cafloor2 ~ tafloor2, value.var = "wsrh")
您可以使用 tidyr 中的spread
代替acast
,但输出格式略有不同,您可能更喜欢acast
输出。
require(tidyr)
Dat %>% group_by(cafloor2, tafloor2) %>%
summarise(wsrh = wmean(srh, p_wt2)) %>%
spread(tafloor2, wsrh)