如何在带有多个输入的mutate中使用函数

时间:2019-04-10 03:29:17

标签: r tidyverse

如何在管道中应用函数。

这是我的df

library(tidyverse)
library(lubridate)

status  <- c("exit", "start", "start", "exit", "start", "exit", "exit", "suspended", "start")
active_date <- c("1/05/2018", "11/10/2017", "1/05/2018", "1/07/2018", "1/07/2018", "27/09/2018", "27/09/2018", "27/09/2018", "25/10/2018")
start_date <- c("11/10/2017", "11/10/2017", "1/05/2018", "1/05/2018", "1/07/2018", "1/07/2018", "1/07/2018", "27/09/2018", "27/09/2018")
exit_date <- c("1/05/2018", NA, NA, "1/07/2018", NA, "27/09/2018", "27/09/2018", NA, NA)
suspend_start_date <- c(NA, NA, NA, NA, NA, "27/09/2018", "27/09/2018", "27/09/2018", "27/09/2018")
suspend_end_date <- c(NA, NA, NA, NA, NA, NA, "25/10/2018", NA, "25/10/2018")


df <- cbind(status, start_date, exit_date,  suspend_start_date, suspend_end_date) %>%
  as_tibble %>% mutate_at(2:5, .funs = dmy)

这是我的职责

find_active_date <- function(x = status,
                             exit_date, 
                             suspend_start_date,
                             suspend_end_date,
                             start_date){
  case_when(x == "exit" ~ exit_date,
            x == "suspended" ~ suspend_start_date,
            x == "start" & !is.na(suspend_end_date) ~ suspend_end_date,
            TRUE ~ start_date)  

}

当我一次输入一个输入时,该函数起作用:

find_active_date(df$status[1],
                 df$exit_date[1],
                 df$suspend_start_date[1],
                 df$suspend_end_date[1],
                 df$start_date[1])

这是所需的输出

output_df <- cbind(df, active_date) %>% 
              as_tibble %>% 
              mutate(active_date = dmy(active_date))

这是我尝试过的不起作用

df %>%
  rowwise %>%
  mutate(active_date = find_active_date(status, 
                                        suspend_start_date, 
                                        suspend_end_date, 
                                        start_date))

2 个答案:

答案 0 :(得分:2)

我们可以将pmapreduce一起使用,并且不会进行任何强制/重新转换

library(tidyerse)
df$active_date <- pmap(df, find_active_date) %>%
                          reduce(c)
df
# A tibble: 9 x 6
#  status    start_date exit_date  suspend_start_date suspend_end_date active_date
#  <chr>     <date>     <date>     <date>             <date>           <date>     
#1 exit      2017-10-11 2018-05-01 NA                 NA               2018-05-01 
#2 start     2017-10-11 NA         NA                 NA               2017-10-11 
#3 start     2018-05-01 NA         NA                 NA               2018-05-01 
#4 exit      2018-05-01 2018-07-01 NA                 NA               2018-07-01 
#5 start     2018-07-01 NA         NA                 NA               2018-07-01 
#6 exit      2018-07-01 2018-09-27 2018-09-27         NA               2018-09-27 
#7 exit      2018-07-01 2018-09-27 2018-09-27         2018-10-25       2018-09-27 
#8 suspended 2018-09-27 NA         2018-09-27         NA               2018-09-27 
#9 start     2018-09-27 NA         2018-09-27         2018-10-25       2018-10-25 

或将base RMap一起使用

do.call(c, do.call(Map, c(f = find_active_date, df)))

注意:在函数中,参数之一被命名为'x'。因此,“状态”列也应与该参数名称匹配。

注意2:以后,这两种解决方案都不需要对Date类进行任何强制。

答案 1 :(得分:1)

您的rowwise解决方案有效,但是您缺少exit_date

library(dplyr)

df %>%
  rowwise %>%
  mutate(active_date = find_active_date(status, 
                                        exit_date,
                                        suspend_start_date, 
                                        suspend_end_date, 
                                        start_date))


# A tibble: 9 x 6
#  status    start_date exit_date  suspend_start_date suspend_end_date active_date
#  <chr>     <date>     <date>     <date>             <date>           <date>     
#1 exit      2017-10-11 2018-05-01 NA                 NA               2018-05-01 
#2 start     2017-10-11 NA         NA                 NA               2017-10-11 
#3 start     2018-05-01 NA         NA                 NA               2018-05-01 
#4 exit      2018-05-01 2018-07-01 NA                 NA               2018-07-01 
#5 start     2018-07-01 NA         NA                 NA               2018-07-01 
#6 exit      2018-07-01 2018-09-27 2018-09-27         NA               2018-09-27 
#7 exit      2018-07-01 2018-09-27 2018-09-27         2018-10-25       2018-09-27 
#8 suspended 2018-09-27 NA         2018-09-27         NA               2018-09-27 
#9 start     2018-09-27 NA         2018-09-27         2018-10-25       2018-10-25 

其他选择是使用pmap_dbl中的purrr,它返回日期作为数字值,以后可以用as.Date进行更改。

library(dplyr)
library(purrr)

df %>%
  mutate(active_date = pmap_dbl(list(status, exit_date, suspend_start_date, 
                       suspend_end_date, start_date), find_active_date), 
         active_date = as.Date(active_date, origin = "1970-01-01"))