将select_helpers与dplyr :: coalesce一起使用

时间:2018-04-29 15:54:26

标签: r dplyr

我有一个非常宽的数据帧(远大于此处为reprex提供的数据)。

使用下面提供的数据(分配给my_wide_data),我想利用dplyr::coalesce以及来自dplyr的精选助手(例如dplyr::starts_with)。

# dput output assigned to my_wide_data    
structure(list(myvar1 = c(10L, 3L, 11L, 2L, 4L, 5L, 2L, 6L, 1L, 
                              4L, 12L, 9L, 12L, 2L, 3L, 1L, 2L, 8L, 1L, 2L, 3L, 3L, 8L, 11L, 
                              10L, 6L, 3L, 10L, 5L, 2L, 8L, 3L, 1L, 6L, 2L, 1L, 8L, 4L, 10L, 
                              3L, 1L, 4L, 2L, 12L, 3L, 2L, 5L, 1L, 3L, 5L, 3L, 2L, 12L, 3L, 
                              6L, 11L, 12L, 2L, 6L, 10L, 3L, 10L, 3L, 2L, 2L, 2L, 2L, 3L, 6L, 
                              3L, 6L, 10L, 1L, 3L, 3L, 6L, 2L, 3L, 3L, 3L, 2L, 3L, 2L, 10L, 
                              3L, 3L, 4L, 1L, 3L, 2L, 3L, 9L, 1L, 1L, NA, 5L, 1L, 8L, 3L, 10L, 
                              3L, 3L, 4L, 7L, 10L, 2L, 2L, 11L, 6L, 11L, 6L, 4L, 4L, 12L, 6L, 
                              6L, 1L, 2L, 11L, 2L, 2L, 11L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 9L, 
                              2L, 1L, 1L, 4L, 2L, 8L, 2L, 10L, 6L, 3L, 1L, 6L, 2L, 10L, 3L, 
                              5L, 6L, 3L, 4L, 10L, 9L, 3L, 4L, 3L, 2L, 3L, 9L, 3L, 3L, 1L, 
                              10L, 4L, 4L, 6L, 2L, 7L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 7L, 2L, 
                              2L, 6L, 2L, 4L, 3L, 3L, 4L, 2L, 4L, 2L, 5L, 5L, 3L, 6L, 5L, 4L, 
                              5L, 4L, 4L, 10L, 1L, 9L, 4L, 4L, 4L, 4L, 8L, 6L, 5L), myvar2 = c(24L, 
                                                                                               24L, 27L, 8L, 9L, 15L, 1L, 27L, 3L, 23L, 28L, 10L, 24L, 5L, 14L, 
                                                                                               17L, 16L, 28L, 29L, 16L, 3L, 13L, 7L, 13L, 18L, 25L, 10L, 10L, 
                                                                                               15L, 27L, 21L, 17L, 25L, 25L, 15L, 25L, 21L, 13L, 9L, 28L, 1L, 
                                                                                               13L, 19L, 21L, 23L, 15L, NA, 29L, 12L, 25L, 1L, 5L, 12L, 7L, 
                                                                                               15L, 25L, 4L, 8L, 30L, 25L, 8L, NA, 6L, 16L, 14L, 7L, 20L, 26L, 
                                                                                               19L, 10L, 1L, 15L, 30L, 7L, 16L, 23L, 24L, 21L, 8L, 1L, 1L, 10L, 
                                                                                               26L, 28L, 5L, 7L, 21L, 10L, 13L, 26L, 14L, 5L, 22L, 18L, NA, 
                                                                                               NA, 9L, 20L, 17L, 23L, 3L, 13L, 7L, 5L, 6L, 9L, 8L, 15L, 9L, 
                                                                                               10L, 15L, 13L, NA, 30L, 22L, 14L, 9L, 16L, 6L, 13L, 19L, 15L, 
                                                                                               1L, 7L, 19L, 25L, 10L, NA, 8L, 25L, 5L, 2L, 16L, 8L, 19L, 18L, 
                                                                                               27L, 2L, NA, 16L, 29L, 4L, 7L, 27L, 24L, 5L, 6L, 17L, 16L, 13L, 
                                                                                               11L, NA, 12L, 9L, 8L, 1L, NA, 5L, 12L, 3L, 3L, 10L, 16L, 16L, 
                                                                                               5L, 24L, 10L, 17L, 23L, 19L, 12L, 12L, 18L, 6L, 1L, 3L, 15L, 
                                                                                               26L, 28L, 28L, 27L, 3L, 18L, 22L, 13L, 11L, 30L, 24L, 1L, 25L, 
                                                                                               21L, 7L, 14L, 16L, 9L, 3L, 28L, 11L, 17L, 11L, 25L, 23L, 7L, 
                                                                                               21L), myvar3 = c(78L, 79L, 78L, 78L, 79L, 78L, 79L, 77L, 79L, 
                                                                                                                79L, 76L, 78L, 78L, 79L, 79L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 
                                                                                                                78L, 78L, 78L, 79L, 79L, 78L, 78L, 79L, 78L, 79L, 79L, 78L, 79L, 
                                                                                                                79L, 78L, 78L, 78L, 79L, 79L, 79L, 79L, 78L, 79L, 79L, 73L, 79L, 
                                                                                                                79L, 79L, 79L, 79L, 72L, 79L, 78L, 78L, 78L, 79L, 78L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 79L, 79L, 79L, 78L, 78L, 79L, 78L, 78L, 79L, 79L, 
                                                                                                                79L, 76L, 79L, 78L, 79L, 79L, 79L, 79L, 79L, 75L, 79L, 79L, 79L, 
                                                                                                                79L, 79L, 79L, 79L, 78L, 79L, 79L, 77L, 78L, 79L, 78L, 79L, 78L, 
                                                                                                                79L, 79L, 79L, 78L, 78L, 79L, 79L, 78L, 78L, 78L, 78L, 79L, 79L, 
                                                                                                                78L, 78L, 76L, 79L, 76L, 77L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 
                                                                                                                79L, 79L, 79L, 78L, 78L, 79L, 78L, 79L, 79L, 78L, 79L, 78L, 79L, 
                                                                                                                79L, 79L, 79L, 79L, 78L, 79L, 79L, 77L, 79L, 79L, 78L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 79L, 78L, 79L, 79L, 79L, 78L, 79L, 79L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 79L, 79L, 79L, 78L, 79L, 
                                                                                                                78L, 79L, 79L, 79L, 79L, 79L, 78L, 79L, 79L, 79L, 79L, 79L, 79L, 
                                                                                                                79L, 78L, 79L, 78L, 79L, 78L, 79L, 79L, 79L, 79L, 76L, 78L, 79L
                                                                                               )), class = "data.frame", row.names = c(NA, -204L)) -> my_wide_data

换句话说,而不是

my_wide_data %>%
  mutate(coalesce_var <- coalesce(myvar1, myvar2, myvar3))

我希望能够做类似

的事情
my_wide_data %>%
  mutate(coalesce_var <- coalesce(starts_with("my")))

问题:是否有可能在dplyrtidyverse中的其他地方完成此类内容?

1 个答案:

答案 0 :(得分:4)

以下工作利用coalesce(...)可以接受列表

vecs <- list(
  c(1, 2, NA, NA, 5),
  c(NA, NA, 3, 4, 5)
)
coalesce(!!! vecs)

您可以使用select中的辅助函数并将生成的所选数据框转换为列表

my_wide_data %>%
  mutate(coalesce_var = coalesce(!!! select(., starts_with("my"))))

#     myvar1 myvar2 myvar3 coalesce_var
# 1       10     24     78           10
# 2        3     24     79            3
# 3       11     27     78           11
# 4        2      8     78            2
# 5        4      9     79            4
# etc

编辑这是另一种结构 - 我更喜欢

library(rlang)
library(tidyselect)
my_wide_data %>%
  mutate(coalesce_var = coalesce(!!! syms(vars_select(names(.), starts_with("my")))))