我有两个功能:date_diff和group_stat。因此,我已阅读本文tidyverse,并尝试创建简单的函数并使用管道。
第一个函数创建一个difftime
并将其命名为timex_minus_timey
,但是当我将此结果传递到下一个函数中时,我必须查看名称,以便可以填写summary_var。有一个更好的方法吗?
library(tidyverse)
#
set.seed(42)
data <- dplyr::bind_rows(
tibble::tibble(Hosp = rep("A", 1000),
drg = sample(letters[1:5], 1000, replace = TRUE),
time1 = as.POSIXlt("2018-02-03 08:00:00", tz = "UTC") + rnorm(1000, 0, 60*60*60),
time2 = time1 + runif(1000, min = 10*60, max = 20*60)),
tibble::tibble(Hosp = rep("B", 1000),
drg = sample(letters[1:5], 1000, replace = TRUE),
time1 = as.POSIXlt("2018-02-03 08:00:00", tz = "UTC") + rnorm(1000, 0, 60*60*60),
time2 = time1 + runif(1000, min = 10*60, max = 20*60))
)
date_diff <- function(df, stamp1, stamp2, units = "mins"){
stamp1 <- rlang::enquo(stamp1)
stamp2 <- rlang::enquo(stamp2)
name <- paste0(rlang::quo_name(stamp1), "_minus_", rlang::quo_name(stamp2))
out <- df %>%
dplyr::mutate(!!name := as.numeric(difftime(!!stamp1, !!stamp2, units=units)))
out
}
group_stat <- function(df, group_var, summary_var, .f) {
func <- rlang::as_function(.f)
group_var <- rlang::enquo(group_var)
summary_var <-rlang::enquo(summary_var)
name <- paste0(rlang::quo_name(summary_var), "_", deparse(substitute(.f)))
df %>%
dplyr::group_by(!!group_var) %>%
dplyr::summarise(!!name := func(!!summary_var, na.rm = TRUE))
}
data %>%
date_diff(time2, time1) %>%
group_stat(Hosp, summary_var = time2_minus_time1, mean)
#> # A tibble: 2 x 2
#> Hosp time2_minus_time1_mean
#> <chr> <dbl>
#> 1 A 15.1
#> 2 B 14.9
由reprex package(v0.2.1)于2019-05-02创建
答案 0 :(得分:1)
如果您打算始终以这种方式一个接一个地使用这些功能,则可以使用date_diff
添加一个包含新列名称的属性,并让group_stat
使用该属性。在if
条件下,仅当属性存在且未提供summary_var
参数时才使用该属性。
date_diff <- function(df, stamp1, stamp2, units = "mins"){
stamp1 <- rlang::enquo(stamp1)
stamp2 <- rlang::enquo(stamp2)
name <- paste0(rlang::quo_name(stamp1), "_minus_", rlang::quo_name(stamp2))
out <- df %>%
dplyr::mutate(!!name := as.numeric(difftime(!!stamp1, !!stamp2, units=units)))
attr(out, 'date_diff_nm') <- name
out
}
group_stat <- function(df, group_var, summary_var, .f) {
if(!is.null(attr(df, 'date_diff_nm')) & missing(summary_var))
summary_var <- attr(df, 'date_diff_nm')
group_var <- rlang::enquo(group_var)
name <- paste0(summary_var, "_", deparse(substitute(.f)))
df %>%
dplyr::group_by(!!group_var) %>%
dplyr::summarise_at(summary_var, funs(!!name := .f), na.rm = T)
}
data %>%
date_diff(time2, time1) %>%
group_stat(Hosp, .f = mean)
# # A tibble: 2 x 2
# Hosp time2_minus_time1_mean
# <chr> <dbl>
# 1 A 15.1
# 2 B 14.9