使用dplyr

时间:2019-11-20 20:15:55

标签: r dplyr

在下面的数据集中,对于每个id,我已标记(第m列的m_flag和第w列的f_flag)在m OR w列中的3之后第一个出现1或2。

我正在尝试:

1)如果缺少m但不包含var 1,则在m中3之前的行中将m_flag设置为1

然后,将m_flag中的前一个1转换为0

2)如果w缺失但var 2没有(例如第7行),则将w中3之前的行中的f_flag设置为1

然后,将f_flag中的前一个1转换为0(例如第6行)

df <- data.frame(id=c(1,1,1, 2,2, 3,3,3, 4,4,4),
                  m=c(2,NA,NA,  2,3, 2,2,3, 2,2,3),
                  w=c(2,NA,3, 2,NA, 2,NA,3, 2,NA,3),
               var1=c(5,NA,NA, 6,6,7,7,7, 8,8,8),
               var2=c(3,3,3, 4,NA, 5,5,5, 6,NA,6), 
               m_flag=c(1,0,NA, 1,NA, 0,1,NA, 0,1,NA),
               f_flag=c(1,0,NA, 1,NA, 1,0,NA, 1,0,NA))
        > df
       id  m  w var1 var2 m_flag f_flag
    1   1  2  2    5    3      1      1
    2   1 NA NA   NA    3      0      0
    3   1 NA  3   NA    3     NA     NA
    4   2  2  2    6    4      1      1
    5   2  3 NA    6   NA     NA     NA
    6   3  2  2    7    5      0      1
    7   3  2 NA    7    5      1      0
    8   3  3  3    7    5     NA     NA
    9   4  2  2    8    6      0      1
    10  4  2 NA    8   NA      1      0
    11  4  3  3    8    6     NA     NA

输出(注意:只有第7行中的 1 会从0变为1,而第6行中的 0 则由1变为0)

 output <- data.frame(id=c(1,1,1, 2,2, 3,3,3, 4,4,4),
                  m=c(2,NA,NA,  2,3, 2,2,3, 2,2,3),
                  w=c(2,NA,3, 2,NA, 2,NA,3, 2,NA,3),
               var1=c(5,NA,NA, 6,6,7,7,7, 8,8,8),
               var2=c(3,3,3, 4,NA, 5,5,5, 6,NA,6), 
               m_flag=c(1,0,NA, 1,NA, 0,1,NA, 0,1,NA),
               f_flag=c(1,0,NA, 1,NA, 0,1,NA, 1,0,NA))

> output
   id  m  w var1 var2 m_flag f_flag
1   1  2  2    5    3      1      1
2   1 NA NA   NA    3      0      0
3   1 NA  3   NA    3     NA     NA
4   2  2  2    6    4      1      1
5   2  3 NA    6   NA     NA     NA
6   3  2  2    7    5      0    **0**
7   3  2 NA    7    5      1    **1**
8   3  3  3    7    5     NA     NA
9   4  2  2    8    6      0      1
10  4  2 NA    8   NA      1      0
11  4  3  3    8    6     NA     NA

谢谢

1 个答案:

答案 0 :(得分:1)

首先,在步骤1中创建与条件相对应的列。我们将其称为meet_condition_fmeet_condition_m。然后,我们将使用lead()在下一行中查看条件的值。如果为true,则将相应的标志重置为0。然后,对于条件为true的行,将标志设置为1(这是步骤1的第二步)。

如果您需要按组进行操作,例如,在变异之前添加group_by(id)。不要忘了以后取消分组。

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
df <- data.frame(id=c(1,1,1, 2,2, 3,3,3, 4,4,4),
                 m=c(2,NA,NA,  2,3, 2,2,3, 2,2,3),
                 w=c(2,NA,3, 2,NA, 2,NA,3, 2,NA,3),
                 var1=c(5,NA,NA, 6,6,7,7,7, 8,8,8),
                 var2=c(3,3,3, 4,NA, 5,5,5, 6,NA,6), 
                 m_flag=c(1,0,NA, 1,NA, 0,1,NA, 0,1,NA),
                 f_flag=c(1,0,NA, 1,NA, 1,0,NA, 1,0,NA))

df %>% mutate(
  # Create an indicator column for the condition specified.
  # `lead` looks at the "m" value for the next row.
  # `if_else` takes a logical condition and returns the result
  # from true/false/missing depending which criteria each one meets.
  meet_condition_m = if_else(
    is.na(m) &
      lead(m) == 3 &
      !is.na(var1),
    true = TRUE,
    false = FALSE,
    missing = NA),
  meet_condition_f = if_else(
    is.na(w) &
      lead(w) == 3 &
      !is.na(var2),
    true = TRUE, 
    false = FALSE,
    missing = NA
  ),
  # First, perform step to to convert the previous 1 to 0
  m_flag = if_else(lead(meet_condition_m) & m_flag == 1, 0, m_flag, m_flag),
  # Then execute the first step
  m_flag = if_else(meet_condition_m, 1, m_flag, m_flag),
  # Repeat for f
  f_flag = if_else(lead(meet_condition_f) & f_flag == 1, 0, f_flag, f_flag),
  f_flag = if_else(meet_condition_f, 1, f_flag, f_flag)) %>% 
  # Drop intermediate columns.
  select(-meet_condition_m, -meet_condition_f)
#>    id  m  w var1 var2 m_flag f_flag
#> 1   1  2  2    5    3      1      0
#> 2   1 NA NA   NA    3      0      1
#> 3   1 NA  3   NA    3     NA     NA
#> 4   2  2  2    6    4      1      1
#> 5   2  3 NA    6   NA     NA     NA
#> 6   3  2  2    7    5      0      0
#> 7   3  2 NA    7    5      1      1
#> 8   3  3  3    7    5     NA     NA
#> 9   4  2  2    8    6      0      1
#> 10  4  2 NA    8   NA      1      0
#> 11  4  3  3    8    6     NA     NA
Created on 2019-11-20 by the reprex package (v0.3.0)