Question

我想在dplyr函数中创建一个简单的if / else条件。我看过一些有用的信息（例如How to parametrize function calls in dplyr 0.7?），但仍然遇到麻烦。

下面是一个玩具示例，当我在不使用分组变量的情况下调用函数时，该示例有效。然后，该函数将失败，并带有分组变量。

# example dataset
test <- tibble(
  A = c(1:5,1:5),
  B = c(1,2,1,2,3,3,3,3,3,3),
  C = c(1,1,1,1,2,3,4,5,4,3)
)

# begin function, set default for group var to NULL.
prop_tab <- function(df, column, group = NULL) {

  col_name <- enquo(column)
  group_name <- enquo(group)

  # if group_by var is NOT null, then...
  if(!is.null(group)) {
      temp <- df %>%
        select(!!col_name, !!group_name) %>% 
        group_by(!!group_name) %>% 
        summarise(Percentages = 100 * length(!!col_name) / nrow(df))

  } else {
  # if group_by var is null, then...
      temp <- df %>%
        select(!!col_name) %>% 
        group_by(col_name = !!col_name) %>% 
        summarise(Percentages = 100 * length(!!col_name) / nrow(df)) 

  }

  temp
}

test %>% prop_tab(column = C)  # works

test %>% prop_tab(column = A, group = B)  # fails
# Error in prop_tab(., column = A, group = B) : object 'B' not found

Answer 1

这里的问题是，当您提供未加引号的参数时，is.null不知道如何处理它。因此，此代码尝试检查对象B是否为null和错误，因为B在该范围内不存在。相反，您可以使用missing()来检查是否已向函数提供了参数，就像这样。也许有一种更清洁的方法，但这至少行得通，如您在底部所见。

library(tidyverse)
test <- tibble(
  A = c(1:5,1:5),
  B = c(1,2,1,2,3,3,3,3,3,3),
  C = c(1,1,1,1,2,3,4,5,4,3)
)

# begin function, set default for group var to NULL.
prop_tab <- function(df, column, group) {

  col_name <- enquo(column)
  group_name <- enquo(group)

  # if group_by var is not supplied, then:
  if(!missing(group)) {
    temp <- df %>%
      select(!!col_name, !!group_name) %>%
    group_by(!!group_name) %>%
    summarise(Percentages = 100 * length(!!col_name) / nrow(df))

  } else {
    # if group_by var is null, then...
    temp <- df %>%
      select(!!col_name) %>% 
      group_by(col_name = !!col_name) %>% 
      summarise(Percentages = 100 * length(!!col_name) / nrow(df)) 

  }

  temp
}

test %>% prop_tab(column = C)  # works
#> # A tibble: 5 x 2
#>   col_name Percentages
#>      <dbl>       <dbl>
#> 1        1          40
#> 2        2          10
#> 3        3          20
#> 4        4          20
#> 5        5          10

test %>% prop_tab(column = A, group = B)
#> # A tibble: 3 x 2
#>       B Percentages
#>   <dbl>       <dbl>
#> 1     1          20
#> 2     2          20
#> 3     3          60

由reprex package（v0.2.0）于2018-06-29创建。

Answer 2

您可以使用missing代替is.null，这样就不会对您的参数进行评估（这就是导致错误的原因）：

prop_tab <- function(df, column, group = NULL) {

  col_name <- enquo(column)
  group_name <- enquo(group)

  # if group_by var is NOT null, then...
  if(!missing(group)) {
    temp <- df %>%
      select(!!col_name, !!group_name) %>% 
      group_by(!!group_name) %>% 
      summarise(Percentages = 100 * length(!!col_name) / nrow(df))

  } else {
    # if group_by var is null, then...
    temp <- df %>%
      select(!!col_name) %>% 
      group_by(col_name = !!col_name) %>% 
      summarise(Percentages = 100 * length(!!col_name) / nrow(df)) 

  }

  temp
}

test %>% prop_tab(column = C) 
# example dataset
# # A tibble: 5 x 2
#   col_name Percentages
#      <dbl>       <dbl>
# 1        1          40
# 2        2          10
# 3        3          20
# 4        4          20
# 5        5          10

test %>% prop_tab(column = A, group = B)
# # A tibble: 3 x 2
#       B Percentages
#   <dbl>       <dbl>
# 1     1          20
# 2     2          20
# 3     3          60

您也可以使用length(substitute(group))而不是!missing(group)，它会更强大，因为在某些情况下有人用NULL显式填充组参数，它不会失败。在这种情况下，前一个选项会崩溃。

Answer 3

一种选择是检查“ group_name”而不是“ group”

prop_tab <- function(df, column, group = NULL) {

  col_name <- enquo(column)
  group_name <- enquo(group)

  # if group_by var is NOT null, then...
  if(as.character(group_name)[2] != "NULL") {
      temp <- df %>%
        select(!!col_name, !!group_name) %>% 
        group_by(!!group_name) %>% 
        summarise(Percentages = 100 * length(!!col_name) / nrow(df))

  } else {
  # if group_by var is null, then...
      temp <- df %>%
        select(!!col_name) %>% 
        group_by(col_name = !!col_name) %>% 
        summarise(Percentages = 100 * length(!!col_name) / nrow(df)) 

  }

  temp
}

-检查

prop_tab(test, column = C, group = B)
# A tibble: 3 x 2
#<     B Percentages
# <dbl>       <dbl>
#1     1          20
#2     2          20
#3     3          60  



prop_tab(test, column = C)
# A tibble: 5 x 2
#  col_name Percentages
#     <dbl>       <dbl>
#1        1          40
#2        2          10
#3        3          20
#4        4          20
#5        5          10

dplyr 0.7函数中的if / else条件

3 个答案: