如果满足多个条件,请添加重复的行

时间:2019-07-31 02:20:37

标签: r dplyr bind magrittr

当我在数据框中满足2个条件时,我试图添加重复行

原始数据帧如下:

ID_NO   SSN DOB STATUS  NEW_VALUE   OLD_VALUE   ADDRESS ZIP CITY    COUNTRY
123 7687    1/1/91  0   NA  NA  xyz 45  nyc usa
456 85723   2/1/91  -1  NA  NA  uii 34  fll usa
789 8783    3/1/93  2   NA  NA  oii 56  bos usa
987 18267   9/1/99  -4  NA  NA  qww 67  sfo usa
765 9238    10/1/00 3   NA  NA  jhk 87  lax usa

我要在这里做的是为STATUS> 0和NEW_VALUE为NA的每一行添加一个重复行,并将OLD_VALUE中的值更改为认可

我尝试使用bind_rows并从dplyr中进行突变,如下所示:

df<-df %>% mutate(rownum = row_number()) %>% 
  bind_rows(., filter(., is.na(df$NEW_VALUE) & df$STATUS> 0) %>% 
              mutate(ID_NO = ID_NO, 
                     OLD_VALUE='approved',
                     rownum = rownum+.5)) %>% 
  arrange(rownum) %>%
  select(-rownum)

这是我从代码中获得的预期结果,但是我想知道这是否是正确的方法,是否还有其他替代方法?

ID_NO   SSN DOB STATUS  NEW_VALUE   OLD_VALUE   ADDRESS ZIP CITY    COUNTRY
123 7687    1/1/91  0   NA  NA  xyz 45  nyc usa
456 85723   2/1/91  -1  NA  NA  uii 34  fll usa
789 8783    3/1/93  2   NA  NA  oii 56  bos usa
789 8783    3/1/93  2   NA  approved    oii 56  bos usa
987 18267   9/1/99  -4  NA  NA  qww 67  sfo usa
765 9238    10/1/00 3   NA  NA  jhk 87  lax usa
765 9238    10/1/00 3   NA  approved    jhk 87  lax usa

3 个答案:

答案 0 :(得分:1)

这类似于您的尝试,但我会做类似的事情

#Change date to date object so that it is easy to arrange
df$DOB <- as.Date(df$DOB, "%d/%m/%y")

library(dplyr)
bind_rows(df, df %>%
               filter(STATUS > 0 & is.na(NEW_VALUE)) %>%
               mutate(OLD_VALUE = "approved")) %>%
               arrange(DOB)

#  ID_NO   SSN        DOB STATUS NEW_VALUE OLD_VALUE ADDRESS ZIP CITY COUNTRY
#1   123  7687 1991-01-01      0        NA      <NA>     xyz  45  nyc     usa
#2   456 85723 1991-01-02     -1        NA      <NA>     uii  34  fll     usa
#3   789  8783 1993-01-03      2        NA      <NA>     oii  56  bos     usa
#4   789  8783 1993-01-03      2        NA  approved     oii  56  bos     usa
#5   987 18267 1999-01-09     -4        NA      <NA>     qww  67  sfo     usa
#6   765  9238 2000-01-10      3        NA      <NA>     jhk  87  lax     usa
#7   765  9238 2000-01-10      3        NA  approved     jhk  87  lax     usa

在基数R中可以写为

df1 <- rbind(df, transform(subset(df, df$STATUS > 0 & is.na(NEW_VALUE)), 
             OLD_VALUE = "approved"))
df1[order(df1$DOB), ]

答案 1 :(得分:1)

在完成bind_rows之后,无需使用filter方法就可以做到这一点。为此,我们可以rep对满足逻辑条件的行进行拼接,然后将其传递给uncount

library(tidyverse)
df %>% 
  uncount((STATUS > 0 & is.na(NEW_VALUE))+1) %>% 
  mutate(OLD_VALUE = case_when(duplicated(ID_NO)~ "approved", 
          TRUE ~ NA_character_))
#  ID_NO   SSN     DOB STATUS NEW_VALUE OLD_VALUE ADDRESS ZIP CITY COUNTRY
#1   123  7687  1/1/91      0        NA      <NA>     xyz  45  nyc     usa
#2   456 85723  2/1/91     -1        NA      <NA>     uii  34  fll     usa
#3   789  8783  3/1/93      2        NA      <NA>     oii  56  bos     usa
#4   789  8783  3/1/93      2        NA  approved     oii  56  bos     usa
#5   987 18267  9/1/99     -4        NA      <NA>     qww  67  sfo     usa
#6   765  9238 10/1/00      3        NA      <NA>     jhk  87  lax     usa
#7   765  9238 10/1/00      3        NA  approved     jhk  87  lax     usa

base R中的类似选项

i1 <- with(df, rep(seq_len(nrow(df)), (STATUS > 0 & is.na(NEW_VALUE))+1))
df1 <- df[i1, ]
df1$OLD_VALUE[duplicated(df1$ID_NO)] <- "approved"

数据

df <- structure(list(ID_NO = c(123L, 456L, 789L, 987L, 765L), SSN = c(7687L, 
85723L, 8783L, 18267L, 9238L), DOB = c("1/1/91", "2/1/91", "3/1/93", 
"9/1/99", "10/1/00"), STATUS = c(0L, -1L, 2L, -4L, 3L), NEW_VALUE = c(NA, 
NA, NA, NA, NA), OLD_VALUE = c(NA, NA, NA, NA, NA), ADDRESS = c("xyz", 
"uii", "oii", "qww", "jhk"), ZIP = c(45L, 34L, 56L, 67L, 87L), 
    CITY = c("nyc", "fll", "bos", "sfo", "lax"), COUNTRY = c("usa", 
    "usa", "usa", "usa", "usa")), class = "data.frame", row.names = c(NA, 
-5L))

答案 2 :(得分:0)

我们还可以使用group_map()添加行。使用@akrun的数据::

library(tidyverse)
df %>% 
  group_by(ID_NO) %>% 
  group_map(~if(.$STATUS>0 & is.na(.$NEW_VALUE))
                bind_rows(.,mutate(.,OLD_VALUE='approved')) else .) %>%
  ungroup()
#> # A tibble: 7 x 10
#>   ID_NO   SSN DOB    STATUS NEW_VALUE OLD_VALUE ADDRESS   ZIP CITY  COUNTRY
#>   <int> <int> <chr>   <int> <lgl>     <chr>     <chr>   <int> <chr> <chr>  
#> 1   123  7687 1/1/91      0 NA        <NA>      xyz        45 nyc   usa    
#> 2   456 85723 2/1/91     -1 NA        <NA>      uii        34 fll   usa    
#> 3   765  9238 10/1/~      3 NA        <NA>      jhk        87 lax   usa    
#> 4   765  9238 10/1/~      3 NA        approved  jhk        87 lax   usa    
#> 5   789  8783 3/1/93      2 NA        <NA>      oii        56 bos   usa    
#> 6   789  8783 3/1/93      2 NA        approved  oii        56 bos   usa    
#> 7   987 18267 9/1/99     -4 NA        <NA>      qww        67 sfo   usa

在原理上也非常接近

df %>%
  nest(-ID_NO) %>%
  mutate(data = map_if(data, ~.$STATUS>0 & is.na(.$NEW_VALUE), ~bind_rows(.,mutate(.,OLD_VALUE='approved')))) %>%
  unnest()

还有一个基本版本,我们首先通过子集复制行,然后使用重复的ID标记行

i <- with(df, rep(seq(nrow(df)),1 + (STATUS>0 & is.na(NEW_VALUE))))
df2 <- df[i,]    
df2[duplicated(df2$ID_NO),"OLD_VALUE"] <- "approved"
df2