我有此数据
SAMPN MODE1 HHVEH PERNO PLANO loop
30 23 2 3 1 25 2
31 23 1 3 2 2 2
32 23 2 3 2 5 2
33 24 1 1 1 2 2
34 24 1 1 1 3 2
35 24 1 1 1 4 3
36 24 1 1 1 5 3
37 24 2 1 2 2 2
38 24 3 1 2 4 2
39 25 2 2 1 2 2
40 25 2 2 1 4 2
41 25 2 2 2 2 2
42 25 2 2 2 3 2
43 27 4 1 1 2 2
44 29 1 0 1 2 2
45 29 1 0 1 3 2
我想做两件事:
1- SAMPN是家庭和每个家庭中每个人的PERNO指数。 PLANO是每个人的旅行,而loop是每个人的旅行。 (每次旅行都是有一些旅行的)。和每次跳闸的MODE1模式。
如果MODE1 == 2,我希望相同的SAMPN,PERNO和循环模式也为2。
dput(r[30:45,1:6])
structure(list(SAMPN = c(" 23", " 23", " 23", " 24",
" 24", " 24", " 24", " 24", " 24", " 25", " 25",
" 25", " 25", " 27", " 29", " 29"), MODE1 = structure(c(2L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 3L, 2L, 2L, 2L, 2L, 4L, 1L, 1L), .Label = c("1",
"2", "3", "4"), class = "factor"), HHVEH = structure(c(4L, 4L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 1L, 1L), .Label = c("0",
"1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"), PERNO = structure(c(1L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7"), class = "factor"), PLANO = structure(c(20L,
1L, 4L, 1L, 2L, 3L, 4L, 1L, 3L, 1L, 3L, 1L, 2L, 1L, 1L, 2L), .Label = c(" 2",
" 3", " 4", " 5", " 6", " 7", " 8", " 9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "20", "23", "25", "29"), class = "factor"),
loop = structure(c(2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2", "3", "4", "5",
"6", "7", "8"), class = "factor")), row.names = 30:45, class = "data.frame")
输出:
SAMPN MODE1 HHVEH PERNO PLANO loop
30 23 2 3 1 25 2
31 23 2 3 2 2 2
32 23 2 3 2 5 2
33 24 1 1 1 2 2
34 24 1 1 1 3 2
35 24 1 1 1 4 3
36 24 1 1 1 5 3
37 24 2 1 2 2 2
38 24 2 1 2 4 2
39 25 2 2 1 2 2
40 25 2 2 1 4 2
41 25 2 2 2 2 2
42 25 2 2 2 3 2
43 27 4 1 1 2 2
44 29 1 0 1 2 2
45 29 1 0 1 3 2
当SAMP为23且PERNO = 2和loop = 2(第二行)时,由于第三列,所以1应该为2。对于第38行也是如此。
答案 0 :(得分:2)
我们可以使用case_when
。按“ SAMPN”,“ PERNO”分组,检查“ MODE1”中是否有any
2s,然后返回2,否则返回“ MODE1”
library(dplyr)
df1 %>%
group_by(SAMPN, PERNO, loop) %>%
mutate(MODE1 = case_when(any(MODE1 == 2)~ 2L,
TRUE ~ as.integer(MODE1)))
# A tibble: 16 x 6
# Groups: SAMPN, PERNO, loop [9]
# SAMPN MODE1 HHVEH PERNO PLANO loop
# <chr> <int> <fct> <fct> <fct> <fct>
# 1 " 23" 2 3 1 25 2
# 2 " 23" 2 3 2 " 2" 2
# 3 " 23" 2 3 2 " 5" 2
# 4 " 24" 1 1 1 " 2" 2
# 5 " 24" 1 1 1 " 3" 2
# 6 " 24" 1 1 1 " 4" 3
# 7 " 24" 1 1 1 " 5" 3
# 8 " 24" 2 1 2 " 2" 2
# 9 " 24" 2 1 2 " 4" 2
#10 " 25" 2 2 1 " 2" 2
#11 " 25" 2 2 1 " 4" 2
#12 " 25" 2 2 2 " 2" 2
#13 " 25" 2 2 2 " 3" 2
#14 " 27" 4 1 1 " 2" 2
#15 " 29" 1 0 1 " 2" 2
#16 " 29" 1 0 1 " 3" 2
或使用data.table
library(data.table)
i1 <- setDT(df1)[, .I[any(MODE1 ==2)],.(SAMPN, PERNO, loop)]$V1
df1[i1, MODE1 := 2L][]