如何在多个函数(每个函数都有参数)中使用mutate_at?

时间:2018-06-26 21:50:12

标签: r dplyr

我想获取R中多个列的多个滞后值。

如何使用mutate_at获得与以下相同的结果?可以说实际示例有30列,因此在每个时间段写30x的滞后公式都没有道理。

df <- data_frame(time_col = 1:26, col_1 = letters, col_2 = rev(letters))

df %>% mutate(col_1_lag_1 = lag(col_1, n = 1, by = time_col),
              col_2_lag_1 = lag(col_2, n = 1, by = time_col),
              col_1_lag_2 = lag(col_1, n = 2, by = time_col),
              col_2_lag_2 = lag(col_2, n = 2, by = time_col))

我认为应该是这样,但我不知道如何指定两组参数:

df <- data_frame(time_col = 1:26, col_1 = letters, col_2 = rev(letters))

df %>% mutate_at(vars(col_1, col_2), funs(lag, lag), n = 1, n = 2, by = time_col)

2 个答案:

答案 0 :(得分:2)

purrr的帮助下的解决方案。

library(dplyr)
library(purrr)

df <- data_frame(time_col = 1:26, col_1 = letters, col_2 = rev(letters))

map_dfc(1:2, function(x){
  df2 <- df %>% transmute_at(vars(starts_with("col")), 
                             funs(lag(., n = x, by = time_col)))
  return(df2)
}) %>%
  bind_cols(df, .) %>%
  set_names(c(names(df), paste0("col_", 1:2, "_lag_", rep(1:2, each = 2))))
# # A tibble: 26 x 7
#    time_col col_1 col_2 col_1_lag_1 col_2_lag_1 col_1_lag_2 col_2_lag_2
#       <int> <chr> <chr> <chr>       <chr>       <chr>       <chr>      
#  1        1 a     z     NA          NA          NA          NA         
#  2        2 b     y     a           z           NA          NA         
#  3        3 c     x     b           y           a           z          
#  4        4 d     w     c           x           b           y          
#  5        5 e     v     d           w           c           x          
#  6        6 f     u     e           v           d           w          
#  7        7 g     t     f           u           e           v          
#  8        8 h     s     g           t           f           u          
#  9        9 i     r     h           s           g           t          
# 10       10 j     q     i           r           h           s          
# # ... with 16 more rows

答案 1 :(得分:1)

这是使用嵌套purrr和准引号语法的替代map_dfc解决方案

bind_cols(
    df,
    map_dfc(c("col_1", "col_2"), function(i) map_dfc(c(1, 2), function(n)
        df %>%
            transmute(!!paste0(i, "_lag_", n, collapse = "") := lag(!!rlang::sym(i), n = n, by = time_col)))))
## A tibble: 26 x 7
#   time_col col_1 col_2 col_1_lag_1 col_1_lag_2 col_2_lag_1 col_2_lag_2
#      <int> <chr> <chr> <chr>       <chr>       <chr>       <chr>
# 1        1 a     z     NA          NA          NA          NA
# 2        2 b     y     a           NA          z           NA
# 3        3 c     x     b           a           y           z
# 4        4 d     w     c           b           x           y
# 5        5 e     v     d           c           w           x
# 6        6 f     u     e           d           v           w
# 7        7 g     t     f           e           u           v
# 8        8 h     s     g           f           t           u
# 9        9 i     r     h           g           s           t
#10       10 j     q     i           h           r           s
## ... with 16 more rows