Question

假设一个df缺少一些这样的值：

   ID col_A_1 col_A_2 col_B_1 col_B_2
1   1       1      NA      NA       a
2   1       2      NA       1       b
3   1       3       1       2       c
4   1       4       2       3       d
5   1      NA       3       4       e
6   2      NA       1       5       f
7   2      NA       2       6       g
8   2       1       3       7       h
9   2       2       4       8    <NA>
10  2       3       5      NA    <NA>

我想使用tidyr fill()填充缺失值，但是，只有包含A的列中的缺失值。

我能够使用：

library(dplyr)
library(tidyr)

df %>%
 group_by(ID) %>%
 fill(names(.)[grepl("A", names(.))], .direction = "up") %>%
 fill(names(.)[grepl("A", names(.))], .direction = "down") %>%
 ungroup()

      ID col_A_1 col_A_2 col_B_1 col_B_2
   <dbl>   <int>   <int>   <int> <chr>  
 1     1       1       1      NA a      
 2     1       2       1       1 b      
 3     1       3       1       2 c      
 4     1       4       2       3 d      
 5     1       4       3       4 e      
 6     2       1       1       5 f      
 7     2       1       2       6 g      
 8     2       1       3       7 h      
 9     2       2       4       8 <NA>   
10     2       3       5      NA <NA>

但是，我正在tidyr fill()中寻找其他变量/列选择的可能性。

样本数据：

df <- data.frame(ID = c(rep(1, 5), rep(2, 5)),
                 col_A_1 = c(1:4, NA, NA, NA, 1:3),
                 col_A_2 = c(NA, NA, 1:3, 1:5),
                 col_B_1 = c(NA, 1:8, NA),
                 col_B_2 = c(letters[1:8], NA, NA),
                 stringsAsFactors = FALSE)

Answer 1

fill可以带select_helpers

library(tidyverse)
df %>% 
   group_by(ID) %>% 
   fill(matches('A'), .direction = 'up') %>%
   fill(matches('A'), .direction = 'down')
# A tibble: 10 x 5
# Groups:   ID [2]
#      ID col_A_1 col_A_2 col_B_1 col_B_2
#   <dbl>   <int>   <int>   <int> <chr>  
# 1     1       1       1      NA a      
# 2     1       2       1       1 b      
# 3     1       3       1       2 c      
# 4     1       4       2       3 d      
# 5     1       4       3       4 e      
# 6     2       1       1       5 f      
# 7     2       1       2       6 g      
# 8     2       1       3       7 h      
# 9     2       2       4       8 <NA>   
#10     2       3       5      NA <NA>

tidyr fill（）中的变量/列选择

1 个答案: