Question

我正在尝试为R编写用于分类变量的单变量分析的泛型函数。我可以将变量传递给dplyr，但不适用于ggplot代码。

这是我的代码-

 univariate_catogrical <- function(dataset,variable){
  variable <- enquo(variable)

  percentage <- dataset %>%
    select(!!variable) %>%
    group_by(!!variable) %>%
    summarise(n = n()) %>%
    mutate(percantage = (n / sum(n)) * 100)
  print(percentage)

  dataset %>%
    count(!!variable) %>%
    ggplot(mapping = aes_(x = rlang::quo_expr(!!variable), 
                          y = n, fill = rlang::quo_expr(!!variable))) +
    geom_bar(stat = 'identity',
             colour = 'white') +
    labs(x = "Reason.for.absence" , y = "count") + 
    ggtitle(" Count of Reason for absence") +
    theme(legend.position = "bottom") -> p
  plot(p)

}

执行上述功能时，我得到

> univariate_catogrical(employee_data_Imputed,Reason.for.absence)
# A tibble: 28 x 3
   Reason.for.absence     n percantage
   <fct>              <int>      <dbl>
 1 1                     16      2.23 
 2 2                      1      0.139
 3 3                      1      0.139
 4 4                      2      0.279
 5 5                      3      0.418
 6 6                      7      0.975
 7 7                     15      2.09 
 8 8                      6      0.836
 9 9                      4      0.557
10 10                    23      3.20 
# ... with 18 more rows
 Hide Traceback

 Rerun with Debug
 Error in grouped_df_impl(data, unname(vars), drop) : 
  Column `variable` is unknown

请问有人建议如何解决它。我正在使用ase_函数传递参数。

请找到可复制的示例。

dput(head(employee_data_Imputed,8))
structure(list(ID = structure(c(11L, 36L, 3L, 7L, 11L, 10L, 20L, 
14L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", 
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", 
"21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", 
"32", "33", "34", "35", "36"), class = "factor"), Reason.for.absence = structure(c(26L, 
20L, 23L, 7L, 23L, 22L, 23L, 19L), .Label = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", 
"27", "28"), class = "factor"), Month.of.absence = structure(c(7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("1", "2", "3", "4", "5", 
"6", "7", "8", "9", "10", "11", "12"), class = "factor"), Day.of.the.week = structure(c(2L, 
2L, 3L, 4L, 4L, 5L, 5L, 1L), .Label = c("2", "3", "4", "5", "6"
), class = "factor"), Seasons = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("1", "2", "3", "4"), class = "factor"), 
    Transportation.expense = c(289, 118, 179, 279, 289, 361, 
    260, 155), Distance.from.Residence.to.Work = c(36, 13, 51, 
    5, 36, 52, 50, 12), Service.time = c(13, 18, 18, 14, 13, 
    3, 11, 14), Age = c(33, 50, 38, 39, 33, 28, 36, 34), Work.load.Average.day = c(239554, 
    239554, 239554, 239554, 239554, 239554, 239554, 239554), 
    Hit.target = c(97, 97, 97, 97, 97, 97, 97, 97), Disciplinary.failure = structure(c(1L, 
    2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"), 
    Education = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1", 
    "2", "3", "4"), class = "factor"), Son = c(2, 1, 0, 2, 2, 
    1, 4, 2), Social.drinker = structure(c(2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L), .Label = c("0", "1"), class = "factor"), Social.smoker = structure(c(1L, 
    1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"), 
    Pet = c(1, 0, 0, 0, 1, 4, 0, 0), Weight = c(90, 98, 89, 68, 
    90, 80, 65, 95), Height = c(172, 178, 170, 168, 172, 172, 
    168, 196), Body.mass.index = c(30, 31, 31, 24, 30, 27, 23, 
    25), Absenteeism.time.in.hours = c(4, 0, 2, 4, 2, 8, 4, 40
    )), .Names = c("ID", "Reason.for.absence", "Month.of.absence", 
"Day.of.the.week", "Seasons", "Transportation.expense", "Distance.from.Residence.to.Work", 
"Service.time", "Age", "Work.load.Average.day", "Hit.target", 
"Disciplinary.failure", "Education", "Son", "Social.drinker", 
"Social.smoker", "Pet", "Weight", "Height", "Body.mass.index", 
"Absenteeism.time.in.hours"), row.names = c(NA, 8L), class = "data.frame")

Answer 1

这是需要更改的部分

dataset %>%
    count(!!variable) %>%
    ggplot(mapping = aes_(x = rlang::quo_expr(variable), y = quote(n), fill = rlang::quo_expr(variable))) +
    ...

您需要在count()中转义“变量”，并且不要将!!与quo_expr一起使用，并且在使用aes_()时需要引用所有参数。

此代码和测试数据已绘制完毕。

使用ggplot inR在函数中未标识的列名

1 个答案: