我有一个大型数据框,其中包括impoT
和nlc
作为密钥(一起忽略t
),以及其他每个都包含数字的列。我想为每个impoT
和nlc
对找到所有其他列的平均值,或者基本上是rowMean。最后给出了我的数据的一个子集,其中只包含一个nlc。我尝试的最后一件事是:
avg <- data.frame(a %>% group_by(impoT, nlc) %>% select(-c(1:3)) %>% mutate(r= rowMeans(.) ))
stds = (a %>% group_by(impoT, nlc) %>% select(-c(1:3)) %>% apply( 1, sd)) #wrong
dput(a)
structure(list(impoT = 1:18, nlc = c(669L, 669L, 669L, 669L,
669L, 669L, 669L, 669L, 669L, 669L, 669L, 669L, 669L, 669L, 669L,
669L, 669L, 669L), t = c(102L, 118L, 134L, 150L, 166L, 182L,
198L, 214L, 230L, 246L, 262L, 278L, 294L, 310L, 326L, 342L, 358L,
374L), X11950 = c(6, 14, 40, 53, 59, 70, 118, 119, 111, 114,
103, 220, 278, 94, 28, 13, 5, 8), X11951 = c(4, 18, 41, 64, 78,
87, 140, 112, 113, 129, 112, 245, 322, 102, 52, 20, 15, 7), X11952 = c(8,
13, 30, 42, 52, 86, 126, 118, 52, 87, 116, 251, 262, 101, 35,
21, 15, 21), X11955 = c(9, 11, 47, 38, 39, 70, 95, 82, 80, 77,
77, 142, 192, 78, 13, 13, 5, 0), X11956 = c(14, 13, 44, 65, 65,
72, 125, 138, 117, 111, 104, 175, 282, 93, 28, 14, 8, 4), X11957 = c(10,
7, 45, 42, 50, 83, 123, 102, 104, 82, 102, 234, 265, 101, 23,
13, 7, 6), X11958 = c(10, 13, 42, 60, 68, 69, 106, 125, 104,
103, 112, 233, 310, 128, 50, 22, 10, 5), X11959 = c(7, 11, 32,
45, 63, 74, 119, 87, 121, 108, 104, 229, 266, 111, 46, 26, 22,
11), X11962 = c(8, 12, 38, 35, 49, 58, 96, 66, 73, 109, 82, 161,
192, 75, 22, 4, 2, 3), X11963 = c(8, 9, 39, 40, 56, 50, 142,
98, 102, 78, 79, 220, 229, 87, 25, 5, 7, 2), X11964 = c(10, 9,
42, 60, 53, 52, 105, 114, 96, 94, 95, 180, 268, 114, 23, 10,
7, 10), X11965 = c(9, 9, 41, 40, 61, 81, 150, 102, 102, 121,
125, 222, 347, 116, 37, 18, 3, 4), X11966 = c(10, 9, 34, 43,
49, 73, 112, 123, 102, 92, 107, 207, 239, 115, 60, 18, 15, 5),
X11969 = c(8, 9, 31, 34, 41, 51, 93, 92, 68, 103, 76, 166,
182, 63, 24, 14, 6, 4), X11970 = c(7, 12, 33, 48, 56, 59,
102, 88, 99, 86, 103, 194, 233, 90, 25, 13, 7, 3), X11971 = c(9,
16, 37, 60, 78, 62, 114, 106, 129, 107, 91, 212, 272, 88,
31, 10, 3, 3), X12088 = c(6, 11, 41, 44, 56, 70, 106, 97,
64, 73, 75, 161, 186, 76, 17, 8, 2, 2), X12089 = c(0, 11,
53, 59, 62, 64, 114, 109, 109, 100, 66, 222, 241, 88, 19,
8, 8, 3), X12090 = c(4, 12, 57, 52, 65, 73, 132, 109, 120,
101, 104, 227, 238, 99, 17, 8, 10, 8), X12091 = c(4, 16,
54, 167, 74, 62, 111, 95, 120, 102, 92, 227, 317, 106, 44,
16, 10, 4), X12092 = c(9, 10, 50, 55, 63, 64, 130, 103, 98,
116, 83, 249, 279, 88, 35, 36, 22, 15), X12095 = c(5, 15,
39, 44, 53, 58, 95, 92, 67, 63, 69, 163, 182, 69, 20, 8,
4, 2), X12096 = c(3, 14, 49, 53, 71, 70, 107, 130, 90, 89,
101, 214, 253, 100, 30, 10, 3, 3), X12097 = c(2, 16, 53,
61, 82, 83, 123, 124, 125, 98, 89, 220, 274, 107, 20, 17,
7, 5), X12098 = c(6, 17, 56, 59, 51, 77, 102, 115, 93, 98,
83, 221, 288, 97, 36, 16, 9, 10), X12099 = c(2, 16, 39, 49,
60, 84, 112, 91, 102, 103, 108, 246, 261, 131, 49, 24, 18,
14), X12102 = c(4, 12, 29, 47, 64, 69, 104, 111, 92, 72,
105, 174, 179, 64, 16, 10, 2, 1)), .Names = c("impoT", "nlc",
"t", "X11950", "X11951", "X11952", "X11955", "X11956", "X11957",
"X11958", "X11959", "X11962", "X11963", "X11964", "X11965", "X11966",
"X11969", "X11970", "X11971", "X12088", "X12089", "X12090", "X12091",
"X12092", "X12095", "X12096", "X12097", "X12098", "X12099", "X12102"
), row.names = c(NA, -18L), class = "data.frame")
答案 0 :(得分:3)
如果你将手段分成两个步骤,这是最简单的,因为你实际上采取了不规则群体的平均值:首先是每一行,每一组是第二组。这意味着你采取了手段的方法,但是如果每个行均值具有相同数量的数字,它们应该是那种方式,尽管你应该考虑分组意味着可能是不同行数的手段。
您还需要嵌套select
,这样您就不会丢失分组变量,并使用summarise
来折叠组。总而言之,
a %>% mutate(r = rowMeans(select(a, -c(1:3)))) %>%
group_by(impoT, nlc) %>% summarise(r = mean(r))
产生
Source: local data frame [18 x 3]
Groups: impoT [?]
impoT nlc r
(int) (int) (dbl)
1 1 669 6.740741
2 2 669 12.407407
3 3 669 42.074074
4 4 669 54.037037
5 5 669 59.925926
6 6 669 69.296296
7 7 669 114.888889
8 8 669 105.481481
9 9 669 98.259259
10 10 669 96.888889
11 11 669 94.925926
12 12 669 207.962963
13 13 669 253.222222
14 14 669 95.592593
15 15 669 30.555556
16 16 669 14.629630
17 17 669 8.592593
18 18 669 6.037037