如何相对于3组更改列的值

时间:2019-09-22 23:37:30

标签: r dataframe

我有此数据

     SAMPN MODE1 HHVEH PERNO PLANO loop
30    23     2     3     1    25    2
31    23     1     3     2     2    2
32    23     2     3     2     5    2
33    24     1     1     1     2    2
34    24     1     1     1     3    2
35    24     1     1     1     4    3
36    24     1     1     1     5    3
37    24     2     1     2     2    2
38    24     3     1     2     4    2
39    25     2     2     1     2    2
40    25     2     2     1     4    2
41    25     2     2     2     2    2
42    25     2     2     2     3    2
43    27     4     1     1     2    2
44    29     1     0     1     2    2
45    29     1     0     1     3    2

我想做两件事:

1- SAMPN是家庭和每个家庭中每个人的PERNO指数。 PLANO是每个人的旅行,而loop是每个人的旅行。 (每次旅行都是有一些旅行的)。和每次跳闸的MODE1模式。

如果MODE1 == 2,我希望相同的SAMPN,PERNO和循环模式也为2。

 dput(r[30:45,1:6])
structure(list(SAMPN = c("   23", "   23", "   23", "   24", 
"   24", "   24", "   24", "   24", "   24", "   25", "   25", 
"   25", "   25", "   27", "   29", "   29"), MODE1 = structure(c(2L, 
1L, 2L, 1L, 1L, 1L, 1L, 2L, 3L, 2L, 2L, 2L, 2L, 4L, 1L, 1L), .Label = c("1", 
"2", "3", "4"), class = "factor"), HHVEH = structure(c(4L, 4L, 
4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 1L, 1L), .Label = c("0", 
"1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"), PERNO = structure(c(1L, 
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L), .Label = c("1", 
"2", "3", "4", "5", "6", "7"), class = "factor"), PLANO = structure(c(20L, 
1L, 4L, 1L, 2L, 3L, 4L, 1L, 3L, 1L, 3L, 1L, 2L, 1L, 1L, 2L), .Label = c(" 2", 
" 3", " 4", " 5", " 6", " 7", " 8", " 9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "20", "23", "25", "29"), class = "factor"), 
    loop = structure(c(2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2", "3", "4", "5", 
    "6", "7", "8"), class = "factor")), row.names = 30:45, class = "data.frame")

输出:

     SAMPN MODE1 HHVEH PERNO PLANO loop
30    23     2     3     1    25    2
31    23     2     3     2     2    2
32    23     2     3     2     5    2
33    24     1     1     1     2    2
34    24     1     1     1     3    2
35    24     1     1     1     4    3
36    24     1     1     1     5    3
37    24     2     1     2     2    2
38    24     2     1     2     4    2
39    25     2     2     1     2    2
40    25     2     2     1     4    2
41    25     2     2     2     2    2
42    25     2     2     2     3    2
43    27     4     1     1     2    2
44    29     1     0     1     2    2
45    29     1     0     1     3    2

当SAMP为23且PERNO = 2和loop = 2(第二行)时,由于第三列,所以1应该为2。对于第38行也是如此。

1 个答案:

答案 0 :(得分:2)

我们可以使用case_when。按“ SAMPN”,“ PERNO”分组,检查“ MODE1”中是否有any 2s,然后返回2,否则返回“ MODE1”

library(dplyr)
df1 %>%
    group_by(SAMPN, PERNO, loop) %>%
    mutate(MODE1 =  case_when(any(MODE1 == 2)~ 2L,
                              TRUE ~ as.integer(MODE1)))
# A tibble: 16 x 6
# Groups:   SAMPN, PERNO, loop [9]
#   SAMPN   MODE1 HHVEH PERNO PLANO loop 
#   <chr>   <int> <fct> <fct> <fct> <fct>
# 1 "   23"     2 3     1     25    2    
# 2 "   23"     2 3     2     " 2"  2    
# 3 "   23"     2 3     2     " 5"  2    
# 4 "   24"     1 1     1     " 2"  2    
# 5 "   24"     1 1     1     " 3"  2    
# 6 "   24"     1 1     1     " 4"  3    
# 7 "   24"     1 1     1     " 5"  3    
# 8 "   24"     2 1     2     " 2"  2    
# 9 "   24"     2 1     2     " 4"  2    
#10 "   25"     2 2     1     " 2"  2    
#11 "   25"     2 2     1     " 4"  2    
#12 "   25"     2 2     2     " 2"  2    
#13 "   25"     2 2     2     " 3"  2    
#14 "   27"     4 1     1     " 2"  2    
#15 "   29"     1 0     1     " 2"  2    
#16 "   29"     1 0     1     " 3"  2    

或使用data.table

library(data.table)
i1 <- setDT(df1)[, .I[any(MODE1 ==2)],.(SAMPN, PERNO, loop)]$V1
df1[i1, MODE1 := 2L][]