我正在尝试使用我的数据帧替换中的一列创建一个函数,因为我也需要其他列的类似计算。
numeric_fun<-function(dataset,grp_var,var){
require("dplyr")
dataset%>%select(grp_var,var)%>% group_by_(grp_var)%>%
summarize(
'q25' = quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],
'median' =round(quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
'avg' = round( mean(var, na.rm=TRUE), 0),
'q75' = quantile(var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4] ,
'n' = sum(!is.na(var))
)%>%
mutate(
q25 = ifelse( n < 5, "--", paste(q25,"%" )),
median = ifelse( n < 5, "--", paste(median,"%")),
avg = ifelse(n < 5, "--", paste(avg,"%")),
q75 = ifelse( n < 5, "--", paste(q75,"%")),
n = n
) %>%
rename( "Industry"=grp_var,
"25%" = q25,
"75%" = q75
)
}
numeric_fun(outplacement,"Q7_1","Q8")
但这些不起作用,还有其他任何有效方法吗?
输出
structure(list(Q7_1 = structure(c(NA, NA, NA, 5L, 5L, 14L, NA,
1L, 9L, 13L, 1L, NA, 3L, 13L, 13L, 13L, 12L, 2L, 11L, 13L, 10L,
11L, NA, 1L, 4L, NA, 5L, NA, 5L, 4L, 5L, 9L, 2L, 4L, 13L, 10L,
13L, 13L, NA, 11L, NA, 1L, 11L, NA, 5L, NA, 1L, NA, 9L, 3L, 1L,
1L, 10L, 1L, NA, 5L, NA, NA, 2L, NA, 6L, 6L, NA, 13L, 14L, NA,
NA, 14L, 8L, 11L, 11L, 8L, 12L, 13L, NA, 3L, 11L, 3L, 11L, 1L,
NA, 9L, NA, 10L, 6L, 1L, 5L, 3L, 1L, 13L, 4L, 14L, NA, 13L), .Label = c("Banking/Financial Services",
"Chemicals", "Consumer Goods", "Energy", "High Tech", "Insurance/Reinsurance",
"Life Sciences", "Logistics", "Mining & Metals", "Other Manufacturing",
"Other Non-Manufacturing", "Retail & Wholesale", "Services (Non-Financial)", "Transportation Equipment"), class = "factor"), Q8 = c(NA, NA,
NA, NA, NA, NA, NA, 2L, 3L, 3L, 6L, NA, 5L, 4L, 2L, 5L, 6L, 2L,
2L, 3L, 2L, 5L, NA, 3L, 1L, NA, 3L, NA, 1L, 3L, 4L, 4L, 2L, 4L,
1L, 3L, 2L, 3L, NA, 2L, NA, 4L, 4L, NA, 1L, NA, 3L, NA, 1L, 3L,
5L, 2L, 3L, 1L, NA, 6L, NA, NA, 4L, NA, 1L, 5L, NA, 2L, 1L, NA,
NA, 2L, 6L, 6L, 2L, 6L, 3L, 5L, NA, 5L, 2L, 1L, 3L, 3L, NA, 3L,
NA, 3L, 3L, 6L, 4L, 1L, 4L, 6L, 3L, 5L, NA, 5L), Q9 = c(3L, 1L,
NA, 1L, 3L, 3L, NA, 3L, 3L, 1L, 1L, NA, 3L, 2L, 2L, 3L, 2L, 3L,
2L, 2L, 2L, 1L, NA, 3L, 1L, NA, 1L, NA, 1L, 2L, 1L, 2L, 3L, 1L,
1L, 1L, 3L, 3L, NA, 3L, NA, 2L, 2L, NA, 1L, NA, 1L, NA, 1L, 2L,
2L, 1L, 2L, 3L, NA, 1L, NA, NA, 2L, NA, 2L, 2L, NA, 2L, 2L, NA,
NA, 1L, 3L, 1L, 3L, 3L, 1L, 3L, NA, 1L, 3L, 1L, 1L, 3L, NA, 1L,
NA, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, NA, 2L), Q10 = c(NA,
1L, NA, 1L, NA, NA, NA, NA, NA, 1L, 1L, NA, NA, 1L, 2L, NA, 1L,
NA, 1L, 1L, 2L, 2L, NA, NA, 2L, NA, 2L, NA, 2L, 1L, NA, 1L, NA,
1L, 1L, 1L, NA, NA, NA, NA, NA, 2L, 1L, NA, 1L, NA, 2L, NA, 2L,
2L, 2L, 1L, 2L, 2L, NA, 1L, NA, NA, 2L, NA, 2L, 1L, NA, 1L, 2L,
NA, NA, 1L, 1L, NA, 1L, NA, NA, 2L, NA, NA, 1L, 1L, 1L, 2L, NA,
1L, NA, 1L, 2L, 2L, 1L, 1L, NA, 1L, NA, 2L, NA, 1L)), row.names = c(NA,
-94L), class = c("data.table", "data.frame"), .internal.selfref = <pointer:
0x0000000000090788>, .Names = c("Q7_1",
"Q8", "Q9", "Q10"))
答案 0 :(得分:2)
问题是代码在不适当的上下文中使用了字符串。我们可以使用sym
和!!从rlang包中翻译它们。添加标有##
的语句,然后使用!!grp_var
和!!var
代替grp_var
和var
。还将group_by_
更改为group_by
,重新格式化,将require
更改为library
(请参阅下一段)并为data.table和rlang添加library
语句。 / p>
请注意library
优于require
,除非在if
内。这样,如果包丢失,它将在library
语句失败,使原因显而易见。另一方面,对于require
,它将进一步失败,使其更难调试。
library(data.table) ##
library(dplyr)
numeric_fun <- function(dataset, grp_var, var) {
grp_var <- sym(grp_var)
var <- sym(var)
dataset %>%
select(!!grp_var,!!var) %>%
group_by(!!grp_var) %>%
summarize(
'q25' = quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm = TRUE)[2],
'median' = round(quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
'avg' = round( mean(!!var, na.rm = TRUE), 0),
'q75' = quantile(!!var, type=6, probs = seq(0, 1, 0.25), na.rm = TRUE)[4] ,
'n' = sum(!is.na(!!var))
) %>%
mutate(
q25 = ifelse( n < 5, "--", paste(q25, "%" )),
median = ifelse( n < 5, "--", paste(median, "%")),
avg = ifelse(n < 5, "--", paste(avg, "%")),
q75 = ifelse( n < 5, "--", paste(q75, "%")),
n = n
) %>%
rename( "Industry" = !!grp_var,
"25%" = q25,
"75%" = q75
)
}
numeric_fun(outplacement,"Q7_1","Q8")
,并提供:
# A tibble: 14 x 6
Industry `25%` median avg `75%` n
<fctr> <chr> <chr> <chr> <chr> <int>
1 Banking/Financial Services 2 % 3 % 4 % 5 % 11
2 Chemicals -- -- -- -- 3
3 Consumer Goods 1 % 3 % 3 % 5 % 5
4 Energy -- -- -- -- 4
5 High Tech 1 % 4 % 3 % 4.5 % 6
6 Insurance/Reinsurance -- -- -- -- 3
7 Logistics -- -- -- -- 2
8 Mining & Metals -- -- -- -- 4
9 Other Manufacturing -- -- -- -- 4
10 Other Non-Manufacturing 2 % 2 % 3 % 4.75 % 8
11 Retail & Wholesale -- -- -- -- 2
12 Services (Non-Financial) 2 % 3 % 3 % 5 % 12
13 Transportation Equipment -- -- -- -- 3
14 <NA> -- -- -- -- 0
dput
不适用于具有外部指针的对象,例如data.table对象,因此我们使用了这个:
outplacement <-
structure(list(Q7_1 = structure(c(NA, NA, NA, 5L, 5L, 14L, NA,
1L, 9L, 13L, 1L, NA, 3L, 13L, 13L, 13L, 12L, 2L, 11L, 13L, 10L,
11L, NA, 1L, 4L, NA, 5L, NA, 5L, 4L, 5L, 9L, 2L, 4L, 13L, 10L,
13L, 13L, NA, 11L, NA, 1L, 11L, NA, 5L, NA, 1L, NA, 9L, 3L, 1L,
1L, 10L, 1L, NA, 5L, NA, NA, 2L, NA, 6L, 6L, NA, 13L, 14L, NA,
NA, 14L, 8L, 11L, 11L, 8L, 12L, 13L, NA, 3L, 11L, 3L, 11L, 1L,
NA, 9L, NA, 10L, 6L, 1L, 5L, 3L, 1L, 13L, 4L, 14L, NA, 13L), .Label = c("Banking/Financial Services",
"Chemicals", "Consumer Goods", "Energy", "High Tech", "Insurance/Reinsurance",
"Life Sciences", "Logistics", "Mining & Metals", "Other Manufacturing",
"Other Non-Manufacturing", "Retail & Wholesale", "Services (Non-Financial)", "Transportation Equipment"), class = "factor"), Q8 = c(NA, NA,
NA, NA, NA, NA, NA, 2L, 3L, 3L, 6L, NA, 5L, 4L, 2L, 5L, 6L, 2L,
2L, 3L, 2L, 5L, NA, 3L, 1L, NA, 3L, NA, 1L, 3L, 4L, 4L, 2L, 4L,
1L, 3L, 2L, 3L, NA, 2L, NA, 4L, 4L, NA, 1L, NA, 3L, NA, 1L, 3L,
5L, 2L, 3L, 1L, NA, 6L, NA, NA, 4L, NA, 1L, 5L, NA, 2L, 1L, NA,
NA, 2L, 6L, 6L, 2L, 6L, 3L, 5L, NA, 5L, 2L, 1L, 3L, 3L, NA, 3L,
NA, 3L, 3L, 6L, 4L, 1L, 4L, 6L, 3L, 5L, NA, 5L), Q9 = c(3L, 1L,
NA, 1L, 3L, 3L, NA, 3L, 3L, 1L, 1L, NA, 3L, 2L, 2L, 3L, 2L, 3L,
2L, 2L, 2L, 1L, NA, 3L, 1L, NA, 1L, NA, 1L, 2L, 1L, 2L, 3L, 1L,
1L, 1L, 3L, 3L, NA, 3L, NA, 2L, 2L, NA, 1L, NA, 1L, NA, 1L, 2L,
2L, 1L, 2L, 3L, NA, 1L, NA, NA, 2L, NA, 2L, 2L, NA, 2L, 2L, NA,
NA, 1L, 3L, 1L, 3L, 3L, 1L, 3L, NA, 1L, 3L, 1L, 1L, 3L, NA, 1L,
NA, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 1L, NA, 2L), Q10 = c(NA,
1L, NA, 1L, NA, NA, NA, NA, NA, 1L, 1L, NA, NA, 1L, 2L, NA, 1L,
NA, 1L, 1L, 2L, 2L, NA, NA, 2L, NA, 2L, NA, 2L, 1L, NA, 1L, NA,
1L, 1L, 1L, NA, NA, NA, NA, NA, 2L, 1L, NA, 1L, NA, 2L, NA, 2L,
2L, 2L, 1L, 2L, 2L, NA, 1L, NA, NA, 2L, NA, 2L, 1L, NA, 1L, 2L,
NA, NA, 1L, 1L, NA, 1L, NA, NA, 2L, NA, NA, 1L, 1L, 1L, 2L, NA,
1L, NA, 1L, 2L, 2L, 1L, 1L, NA, 1L, NA, 2L, NA, 1L)), row.names = c(NA,
-94L), class = "data.frame", .Names = c("Q7_1", "Q8", "Q9", "Q10"))
library(data.table)
outplacement <- as.data.table(outplacement)
答案 1 :(得分:2)
使用dplyr
中的library(dplyr)
numeric_fun<-function(dataset,grp_var,var){
grp_var <- enquo(grp_var)
var <- enquo(var)
dataset %>%
select(!! grp_var, !!var) %>%
group_by(!! grp_var) %>%
summarise(
q25 = quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[2],
median =round(quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[3],0),
avg = round( mean(!! var, na.rm=TRUE), 0),
q75 = quantile(!! var, type=6, probs = seq(0, 1, 0.25), na.rm=TRUE)[4] ,
n = sum(!is.na(!!var))) %>%
mutate(
q25 = ifelse( n < 5, "--", paste0(q25,"%" )),
median = ifelse( n < 5, "--", paste0(median,"%")),
avg = ifelse(n < 5, "--", paste0(avg,"%")),
q75 = ifelse( n < 5, "--", paste0(q75,"%"))
) %>%
rename(Industry= !!grp_var,
`25%` = q25,
`75%` = q75
)
}
我们可以将该功能重新编码为
numeric_fun(df1, Q7_1, Q8)
# A tibble: 14 x 6
# Industry `25%` median avg `75%` n
# <fctr> <chr> <chr> <chr> <chr> <int>
# 1 Banking/Financial Services 2% 3% 4% 5% 11
# 2 Chemicals -- -- -- -- 3
# 3 Consumer Goods 1% 3% 3% 5% 5
# 4 Energy -- -- -- -- 4
# 5 High Tech 1% 4% 3% 4.5% 6
# 6 Insurance/Reinsurance -- -- -- -- 3
# 7 Logistics -- -- -- -- 2
# 8 Mining & Metals -- -- -- -- 4
# 9 Other Manufacturing -- -- -- -- 4
#10 Other Non-Manufacturing 2% 2% 3% 4.75% 8
#11 Retail & Wholesale -- -- -- -- 2
#12 Services (Non-Financial) 2% 3% 3% 5% 12
#13 Transportation Equipment -- -- -- -- 3
#14 <NA> -- -- -- -- 0
- 运行功能
postgres