在本主题的继续中
select group before certain observations in R
我将var-add
(x或y)分组
data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("x",
"y"), class = "factor"), x1 = c(14L, 15L, 36L, 53L, 95L, 56L,
53L, 10L, 39L, 27L, 67L, 25L, 19L, 49L, 53L, 64L, 61L, 12L, 75L,
34L, 88L, 43L, 85L, 93L, 44L, 31L, 37L, 90L, 66L, 39L, 59L, 96L,
41L, 23L, 20L, 26L, 69L, 28L, 35L, 96L, 87L, 82L, 70L, 68L, 26L,
12L, 58L, 18L, 76L, 93L, 3L, 31L), group = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("female", "male"), class = "factor")), .Names = c("add",
"x1", "group"), class = "data.frame", row.names = c(NA, -52L))
此分析如何按组划分?
AntoniosK的解决方案非常好
library(tidyverse)
library(data.table)
data %>%
group_by(group, group2 = rleid(group)) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1]),
Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
ungroup() %>%
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
ungroup() %>%
select(-group2) %>%
data.frame()
但是如果我要为x和y组分别执行。我这样做
data %>% group_by(add) %>%
group_by(group, group2 = rleid(group)) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1]),
Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
ungroup() %>%
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
ungroup() %>%
select(-group2) %>%
data.frame()
由于结果统计不正确
add x1 group MEAN Q25
1 x 14.00000 male 46.86364 26.25
2 x 15.00000 male 46.86364 26.25
3 x 36.00000 male 46.86364 26.25
4 x 53.00000 male 46.86364 26.25
5 x 95.00000 male 46.86364 26.25
6 x 56.00000 male 46.86364 26.25
7 x 53.00000 male 46.86364 26.25
8 x 10.00000 male 46.86364 26.25
9 x 39.00000 male 46.86364 26.25
10 x 27.00000 male 46.86364 26.25
11 x 67.00000 male 46.86364 26.25
12 x 25.00000 female NaN NA
13 x 19.00000 female NaN NA
14 x 49.00000 female NaN NA
15 x 53.00000 female NaN NA
16 x 64.00000 female NaN NA
17 x 61.00000 female NaN NA
18 x 12.00000 female NaN NA
19 x 46.86364 male NaN NA
20 x 46.86364 male NaN NA
21 x 46.86364 male NaN NA
22 x 46.86364 male NaN NA
23 x 46.86364 male NaN NA
24 x 46.86364 male NaN NA
25 x 46.86364 male NaN NA
26 x 46.86364 male NaN NA
27 y 37.00000 male 46.86364 26.25
28 y 90.00000 male 46.86364 26.25
29 y 66.00000 male 46.86364 26.25
30 y 39.00000 male 46.86364 26.25
31 y 59.00000 male 46.86364 26.25
32 y 96.00000 male 46.86364 26.25
33 y 41.00000 male 46.86364 26.25
34 y 23.00000 male 46.86364 26.25
35 y 20.00000 male 46.86364 26.25
36 y 26.00000 male 46.86364 26.25
37 y 69.00000 male 46.86364 26.25
38 y 28.00000 female NaN NA
39 y 35.00000 female NaN NA
40 y 96.00000 female NaN NA
41 y 87.00000 female NaN NA
42 y 82.00000 female NaN NA
43 y 70.00000 female NaN NA
44 y 68.00000 female NaN NA
45 y 26.00000 male NaN NA
46 y 12.00000 male NaN NA
47 y 46.86364 male NaN NA
48 y 18.00000 male NaN NA
49 y 46.86364 male NaN NA
50 y 46.86364 male NaN NA
51 y 3.00000 male NaN NA
52 y 46.86364 male NaN NA
x
的平均值,男性在女性之前为42
y
的平均值,女性为51之前的男性
答案 0 :(得分:2)
这应该有效:
data %>%
group_by(add) %>% # for each add do the below...
mutate(group2 = rleid(group)) %>%
group_by(add, group, group2) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1]),
Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
group_by(add) %>% # for each add update x1 values....
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
ungroup() %>%
select(-group2) %>%
data.frame()
答案 1 :(得分:1)
由于接受的答案已经在使用rleid()
软件包中的data.table
函数,我建议也可以从按组引用更新
library(data.table)
setDT(data)[, rleid := rleid(group), by = add][
rleid == 1L, `:=`(mean = mean(x1), Q25 = quantile(x1, 0.25)), by = add][
, rleid := NULL][]
add x1 group mean Q25 1: x 14 male 42.27273 21.0 2: x 15 male 42.27273 21.0 3: x 36 male 42.27273 21.0 4: x 53 male 42.27273 21.0 5: x 95 male 42.27273 21.0 6: x 56 male 42.27273 21.0 7: x 53 male 42.27273 21.0 8: x 10 male 42.27273 21.0 9: x 39 male 42.27273 21.0 10: x 27 male 42.27273 21.0 11: x 67 male 42.27273 21.0 12: x 25 female NA NA 13: x 19 female NA NA 14: x 49 female NA NA 15: x 53 female NA NA 16: x 64 female NA NA 17: x 61 female NA NA 18: x 12 female NA NA 19: x 75 male NA NA 20: x 34 male NA NA 21: x 88 male NA NA 22: x 43 male NA NA 23: x 85 male NA NA 24: x 93 male NA NA 25: x 44 male NA NA 26: x 31 male NA NA 27: y 37 male 51.45455 31.5 28: y 90 male 51.45455 31.5 29: y 66 male 51.45455 31.5 30: y 39 male 51.45455 31.5 31: y 59 male 51.45455 31.5 32: y 96 male 51.45455 31.5 33: y 41 male 51.45455 31.5 34: y 23 male 51.45455 31.5 35: y 20 male 51.45455 31.5 36: y 26 male 51.45455 31.5 37: y 69 male 51.45455 31.5 38: y 28 female NA NA 39: y 35 female NA NA 40: y 96 female NA NA 41: y 87 female NA NA 42: y 82 female NA NA 43: y 70 female NA NA 44: y 68 female NA NA 45: y 26 male NA NA 46: y 12 male NA NA 47: y 58 male NA NA 48: y 18 male NA NA 49: y 76 male NA NA 50: y 93 male NA NA 51: y 3 male NA NA 52: y 31 male NA NA add x1 group mean Q25