我有一个看起来像这样的数据框
df <- data.frame (
age = rep(c("40-44", "45-49", "50-54", "55-59", "60-64"),4),
dep = rep(c("Dep1", "Dep2", "Dep3", "Dep4", "Dep5"),4),
ethnic = rep(c(rep("M",5),rep("NM",5)),2),
gender = c(rep("M",10), rep("F",10))
)
我正在尝试为许多类似的数据框生成描述性统计数据,所有数据框都来自不同的来源,因此我可以进行比较。
我正在运行以下代码,以获取性别,性别,种族,年龄,性别和种族的计数和比例作为函数,我可以将其应用于多个数据集
Dems_fun <- function(data, sex, eth, dep, age) {
Fun <- function(data, ...) {
group_var <- quos(...)
data %>%
group_by(!!! group_var) %>%
summarise (n = n()) %>%
mutate(freq = n / sum(n)) %>%
unite(dem, !!! group_var, sep = "_", remove = T)
}
Sex <- Fun(data, sex)
Sex_eth <- Fun(data, sex, eth)
Eth <- Fun(data, eth)
Dep <- Fun(data, dep)
Age <- Fun(data, age)
Dems <- rbind(Sex, Sex_eth, Eth, Dep, Age)
colnames(Dems) <- c("Category", "count", "percentage")
return(Dems)
}
当我运行此功能时
test <- Dems_fun(df, gender, ethnic, dep, age)
我收到以下错误消息:
grouped_df_impl(data,unname(vars),drop)出错:列
sex
未知
谁能告诉我哪里出错?
我已经看到了类似的问题Error with using enquo for creating function with ddplyr,但我无法判断相同的错误是否适用于我的示例。
答案 0 :(得分:2)
您唯一缺少的是您需要enquo
函数的列名,然后在以后将它们用作函数参数时取消引用(!!
)它们。因此,当您致电age_var <- enquo(age)
时,您需要!!age_var
然后再与Fun
联系。
library(tidyverse)
df <- data.frame (
age = rep(c("40-44", "45-49", "50-54", "55-59", "60-64"),4),
dep = rep(c("Dep1", "Dep2", "Dep3", "Dep4", "Dep5"),4),
ethnic = rep(c(rep("M",5),rep("NM",5)),2),
gender = c(rep("M",10), rep("F",10))
)
Dems_fun <- function(data, sex, eth, dep, age) {
# enquo all these variables
sex_var <- enquo(sex)
eth_var <- enquo(eth)
dep_var <- enquo(dep)
age_var <- enquo(age)
Fun <- function(data, ...) {
group_var <- quos(...)
data %>%
group_by(!!! group_var) %>%
summarise (n = n()) %>%
mutate(freq = n / sum(n)) %>%
unite(dem, !!! group_var, sep = "_", remove = T)
}
# unquote all these variables
Sex <- Fun(data, !!sex_var)
Sex_eth <- Fun(data, !!sex_var, !!eth_var)
Eth <- Fun(data, !!eth_var)
Dep <- Fun(data, !!dep_var)
Age <- Fun(data, !!age_var)
Dems <- rbind(Sex, Sex_eth, Eth, Dep, Age)
colnames(Dems) <- c("Category", "count", "percentage")
return(Dems)
}
Dems_fun(df, gender, ethnic, dep, age)
#> # A tibble: 18 x 3
#> Category count percentage
#> <chr> <int> <dbl>
#> 1 F 10 0.5
#> 2 M 10 0.5
#> 3 F_M 5 0.5
#> 4 F_NM 5 0.5
#> 5 M_M 5 0.5
#> 6 M_NM 5 0.5
#> 7 M 10 0.5
#> 8 NM 10 0.5
#> 9 Dep1 4 0.2
#> 10 Dep2 4 0.2
#> 11 Dep3 4 0.2
#> 12 Dep4 4 0.2
#> 13 Dep5 4 0.2
#> 14 40-44 4 0.2
#> 15 45-49 4 0.2
#> 16 50-54 4 0.2
#> 17 55-59 4 0.2
#> 18 60-64 4 0.2
由reprex package(v0.2.0)创建于2018-05-30。