我正在尝试通过代码为“ summaryize + group”创建一个函数。 这是我正在处理的数据集。包括ID,国家/地区,苹果,橘子,波罗蜜,香蕉的列(如果该人喜欢此水果,则== 1;如果不喜欢此水果,则== 0)
这是我正在使用的功能代码:
summarize_pct<-function(x,y){
sample %>%
group_by(x, y) %>%
dplyr::summarise(perc = n()) %>%
mutate(Apple = c("dislike", "like")[y + 1]) %>%
tidyr::spread(y, perc, fill = 0) %>%
ungroup() %>%
mutate_at(vars(dislike, like), ~./sum(.))
return(summary)
}
summarize_pct(Country,Apple)
但是我收到了错误消息。...
这是我正在处理的数据:
structure(list(id = 1:30, Country = c("USA", "USA", "USA", "USA",
"USA", "USA", "USA", "USA", "Germany", "Germany", "Germany",
"Germany", "Germany", "Germany", "UK", "UK", "UK", "UK", "UK",
"UK", "UK", "UK", "UK", "UK", "France", "France", "France", "France",
"France", "France"), Apple = c(1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 1L), Banana = c(1L, 1L, 0L, 1L, 1L, 0L, 0L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 1L, 1L), Orange = c(0L, 0L, 0L, 0L, 0L, 1L,
1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L), Jackfruit = c(0L, 0L, 1L, 1L,
0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L)), row.names = c(NA, -30L
), class = c("tbl_df", "tbl", "data.frame"))
答案 0 :(得分:0)
如果要将其用作功能,则可能需要使用非标准评估。从Rlang 0.4.0开始使用curl-curly运算符
library(dplyr)
library(rlang)
summarize_pct<-function(df, x , y) {
df %>%
group_by({{x}}, {{y}}) %>%
dplyr::summarise(perc = n()) %>%
mutate(temp_name = c("dislike", "like")[{{y}} + 1]) %>%
select(-{{y}}) %>%
tidyr::spread(temp_name, perc, fill = 0) %>%
ungroup() %>%
mutate_at(vars(dislike, like), ~./sum(.))
}
summarize_pct(sample, Country,Apple)
# A tibble: 4 x 3
# Country dislike like
# <chr> <dbl> <dbl>
#1 France 0 0.353
#2 Germany 0.308 0.118
#3 UK 0.462 0.235
#4 USA 0.231 0.294
summarize_pct(sample, Country,Orange)
# A tibble: 4 x 3
# Country dislike like
# <chr> <dbl> <dbl>
#1 France 0.2 0.2
#2 Germany 0.133 0.267
#3 UK 0.267 0.4
#4 USA 0.4 0.133