根据Programming with dplyr中有关捕获多个参数的部分,我尝试指定
多个变量在dplyr::group_by
中进行分组
不依赖...
,而是使用显式列表参数group_vars
无需引用arg group_vars
示例数据
df <- tibble::tribble(
~a, ~b, ~c,
"A", "a", 10,
"A", "a", 20,
"A", "b", 1000,
"B", "a", 5,
"B", "b", 1
)
基于Programming with dplyr中的...
的方法
# Approach 1 -----
my_summarise <- function(df, ...) {
group_vars <- dplyr::enquos(...)
df %>%
dplyr::group_by(!!!group_vars) %>%
dplyr::summarise(x = mean(c))
}
my_summarise(df, a, b)
#> # A tibble: 4 x 3
#> # Groups: a [2]
#> a b x
#> <chr> <chr> <dbl>
#> 1 A a 15
#> 2 A b 1000
#> 3 B a 5
#> 4 B b 1
基于带有引号的元素的列表参数的方法:
# Approach 2 -----
my_summarise_2 <- function(df, group_vars = c("a", "b")) {
group_vars <- dplyr::syms(group_vars)
df %>%
dplyr::group_by(!!!group_vars) %>%
dplyr::summarise(x = mean(c))
}
my_summarise_2(df)
#> # A tibble: 4 x 3
#> # Groups: a [2]
#> a b x
#> <chr> <chr> <dbl>
#> 1 A a 15
#> 2 A b 1000
#> 3 B a 5
#> 4 B b 1
my_summarise_2(df, group_vars = "a")
#> # A tibble: 2 x 2
#> a x
#> <chr> <dbl>
#> 1 A 343.
#> 2 B 3
我找不到一种方法可以让我提供未加引号的列名:
# Approach 3 -----
my_summarise_3 <- function(df, group_vars = list(a, b)) {
group_vars <- dplyr::enquos(group_vars)
df %>%
dplyr::group_by(!!!group_vars) %>%
dplyr::summarise(x = mean(c))
}
my_summarise_3(df)
#> Error: Column `list(a, b)` must be length 5 (the number of rows) or one, not 2
我想关键的是要获得与列表相同的列表结构
致电group_vars <- dplyr::enquos(...)
后一个:
<list_of<quosure>>
[[1]]
<quosure>
expr: ^a
env: global
[[2]]
<quosure>
expr: ^b
env: global
我尝试用group_vars %>% purrr::map(dplyr::enquo)
解决它,但是R当然抱怨a
和b
,因为它们需要评估。
答案 0 :(得分:1)
主要问题是list(a, b)
不会捕获未求值的表达式a
和b
,而是求值这些表达式并创建一个包含结果的两元素列表。您基本上有两个选择:
解决方案一::使用rlang::exprs()
捕获实际表达式。由于表达式已经被求值,因此您不再需要在函数内部使用enquos
,它就变成了
my_summarise_3 <- function(df, group_vars = rlang::exprs(a, b)) {
df %>%
dplyr::group_by(!!!group_vars) %>%
dplyr::summarise(x = mean(c))
}
my_summarise_3(df)
# # A tibble: 4 x 3
# # Groups: a [2]
# a b x
# <chr> <chr> <dbl>
# 1 A a 15
# 2 A b 1000
# 3 B a 5
# 4 B b 1
此界面的缺点是用户现在负责引述(即捕获其表达式)参数:
# Note that it can be done using quote() from base R
my_summarise_3(df, group_vars=quote(a))
# # A tibble: 2 x 2
# a x
# <chr> <dbl>
# 1 A 343.
# 2 B 3
解决方案二:完整捕获未评估的表达式list(a,b)
并手动解析。
## Helper function to recursively construct an abstract syntax tree
getAST <- function( ee ) { as.list(ee) %>% map_if(is.call, getAST) }
my_summarise_3 <- function(df, group_vars = list(a,b)) {
## Capture the expression and parse it
ast <- rlang::enexpr(group_vars) %>% getAST()
## Identify symbols present in the data
gvars <- unlist(ast) %>% map_chr(deparse) %>%
intersect(names(df)) %>% rlang::syms()
df %>%
dplyr::group_by(!!!gvars) %>%
dplyr::summarise(x = mean(c))
}
my_summarise_3(df, list(a,b))
# # A tibble: 4 x 3
# # Groups: a [2]
# a b x
# <chr> <chr> <dbl>
# 1 A a 15
# 2 A b 1000
# 3 B a 5
# 4 B b 1
my_summarise_3(df, b)
# # A tibble: 2 x 2
# b x
# <chr> <dbl>
# 1 a 11.7
# 2 b 500.
答案 1 :(得分:0)
我认为您只是想重新发明vars()
:
library(magrittr)
library(dplyr,warn.conflicts = FALSE)
#> Warning: package 'dplyr' was built under R version 3.6.1
df <- tibble::tribble(
~a, ~b, ~c,
"A", "a", 10,
"A", "a", 20,
"A", "b", 1000,
"B", "a", 5,
"B", "b", 1
)
my_summarise <- function(data, group_vars) {
data %>%
group_by_at(group_vars) %>%
summarise(x = mean(c))
}
my_summarise(df, c("a","b"))
#> # A tibble: 4 x 3
#> # Groups: a [2]
#> a b x
#> <chr> <chr> <dbl>
#> 1 A a 15
#> 2 A b 1000
#> 3 B a 5
#> 4 B b 1
my_summarise(df, vars(a, b))
#> # A tibble: 4 x 3
#> # Groups: a [2]
#> a b x
#> <chr> <chr> <dbl>
#> 1 A a 15
#> 2 A b 1000
#> 3 B a 5
#> 4 B b 1
由reprex package(v0.3.0)于2019-07-26创建
如果您真的想要这个,这里是@Artem解决方案的一个变体(但是为什么呢?):
my_summarise <- function(df, group_vars) {
quoted_group_vars <- rlang::list2(
!!!as.list(enexpr(group_vars)[-1]))
df %>%
dplyr::group_by(!!!quoted_group_vars) %>%
dplyr::summarise(x = mean(c))
}
my_summarise(df, list(a, b))
#> # A tibble: 4 x 3
#> # Groups: a [2]
#> a b x
#> <chr> <chr> <dbl>
#> 1 A a 15
#> 2 A b 1000
#> 3 B a 5
#> 4 B b 1