我在R中具有以下功能:
get_rad_wait_time_data <- function(data) {
# get file
file.to.load <- tryCatch(file.choose(new = T), error = function(e) "")
# Want to add months parameter
# months = as.list(months)
# read the file in and clean col names
df <- read.csv(file.to.load) %>%
clean_names()
# clean file and mutate columns
df_clean <- df %>%
filter(!is.na(acc)) %>%
select(
mrn
, step_start_time
, step_end_time
, step_from_to
, wait_time
) %>%
mutate(
step_start_time_clean = mdy_hms(step_start_time)
, step_end_time_clean = mdy_hms(step_end_time)
, elapsed_time = difftime(step_end_time_clean, step_start_time_clean, units = "mins")
, elapsed_time_int = as.integer(elapsed_time)
, procedure_start_year = year(step_start_time_clean)
, procedure_start_month = month(step_start_time_clean)
, procedure_start_month_name = month(step_start_time_clean, label = T, abbr = T)
, procedure_start_day = day(step_start_time_clean)
, procedure_start_dow = wday(step_start_time_clean, label = T, abbr = T)
, procedure_start_hour = hour(step_start_time_clean)
, procedure_end_year = year(step_end_time_clean)
, procedure_end_month = month(step_end_time_clean)
, procedure_end_month_name = month(step_end_time_clean, label = T, abbr = T)
, procedure_end_day = day(step_end_time_clean)
, procedure_end_dow = wday(step_end_time_clean, label = T, abbr = T)
, procedure_end_hour = hour(step_end_time_clean)
) %>%
filter(procedure_start_month_name %in% c("Apr","May","Jun")) %>%
filter(elapsed_time_int >= 0)
dt <- data.table(df_clean)
dt[, mrn := na.locf(mrn, fromLast = T, na.rm = F)]
df_clean <- setDF(dt)
df_clean <- df_clean %>%
group_by(
mrn
, step_start_time_clean
, step_end_time_clean
) %>%
mutate(
proc_count = n()
, avg_time_per_proc = round(elapsed_time_int / proc_count, 2)
)
df_clean <- as.data.frame(df_clean)
}
我希望我的函数类似于get_rad_wait_time_data(data, months)
,在其中我可以输入`months = c(“ Jan”,“ Feb”,“ Mar”)
不知道如何从这里出发
答案 0 :(得分:0)
我将功能更改为似乎可以使用的功能:
get_rad_wait_time_data <- function(data, months) {
# get file
file.to.load <- tryCatch(file.choose(new = T), error = function(e) "")
# Months
months = as.list(months)
# read the file in and clean col names
df <- read.csv(file.to.load) %>%
clean_names()
# clean file and mutate columns
df_clean <- df %>%
filter(!is.na(acc)) %>%
select(
mrn
, step_start_time
, step_end_time
, step_from_to
, wait_time
) %>%
mutate(
step_start_time_clean = mdy_hms(step_start_time)
, step_end_time_clean = mdy_hms(step_end_time)
, elapsed_time = difftime(step_end_time_clean, step_start_time_clean, units = "mins")
, elapsed_time_int = as.integer(elapsed_time)
, procedure_start_year = year(step_start_time_clean)
, procedure_start_month = month(step_start_time_clean)
, procedure_start_month_name = month(step_start_time_clean, label = T, abbr = T)
, procedure_start_day = day(step_start_time_clean)
, procedure_start_dow = wday(step_start_time_clean, label = T, abbr = T)
, procedure_start_hour = hour(step_start_time_clean)
, procedure_end_year = year(step_end_time_clean)
, procedure_end_month = month(step_end_time_clean)
, procedure_end_month_name = month(step_end_time_clean, label = T, abbr = T)
, procedure_end_day = day(step_end_time_clean)
, procedure_end_dow = wday(step_end_time_clean, label = T, abbr = T)
, procedure_end_hour = hour(step_end_time_clean)
) %>%
#filter(procedure_start_month_name %in% c("Apr","May","Jun")) %>%
filter(procedure_start_month_name %in% months) %>%
filter(elapsed_time_int >= 0)
dt <- data.table(df_clean)
dt[, mrn := na.locf(mrn, fromLast = T, na.rm = F)]
df_clean <- setDF(dt)
df_clean <- df_clean %>%
group_by(
mrn
, step_start_time_clean
, step_end_time_clean
) %>%
mutate(
proc_count = n()
, avg_time_per_proc = round(elapsed_time_int / proc_count, 2)
)
df_clean <- as.data.frame(df_clean)
}