我想获取R中多个列的多个滞后值。
如何使用mutate_at获得与以下相同的结果?可以说实际示例有30列,因此在每个时间段写30x的滞后公式都没有道理。
df <- data_frame(time_col = 1:26, col_1 = letters, col_2 = rev(letters))
df %>% mutate(col_1_lag_1 = lag(col_1, n = 1, by = time_col),
col_2_lag_1 = lag(col_2, n = 1, by = time_col),
col_1_lag_2 = lag(col_1, n = 2, by = time_col),
col_2_lag_2 = lag(col_2, n = 2, by = time_col))
我认为应该是这样,但我不知道如何指定两组参数:
df <- data_frame(time_col = 1:26, col_1 = letters, col_2 = rev(letters))
df %>% mutate_at(vars(col_1, col_2), funs(lag, lag), n = 1, n = 2, by = time_col)
答案 0 :(得分:2)
purrr
的帮助下的解决方案。
library(dplyr)
library(purrr)
df <- data_frame(time_col = 1:26, col_1 = letters, col_2 = rev(letters))
map_dfc(1:2, function(x){
df2 <- df %>% transmute_at(vars(starts_with("col")),
funs(lag(., n = x, by = time_col)))
return(df2)
}) %>%
bind_cols(df, .) %>%
set_names(c(names(df), paste0("col_", 1:2, "_lag_", rep(1:2, each = 2))))
# # A tibble: 26 x 7
# time_col col_1 col_2 col_1_lag_1 col_2_lag_1 col_1_lag_2 col_2_lag_2
# <int> <chr> <chr> <chr> <chr> <chr> <chr>
# 1 1 a z NA NA NA NA
# 2 2 b y a z NA NA
# 3 3 c x b y a z
# 4 4 d w c x b y
# 5 5 e v d w c x
# 6 6 f u e v d w
# 7 7 g t f u e v
# 8 8 h s g t f u
# 9 9 i r h s g t
# 10 10 j q i r h s
# # ... with 16 more rows
答案 1 :(得分:1)
这是使用嵌套purrr
和准引号语法的替代map_dfc
解决方案
bind_cols(
df,
map_dfc(c("col_1", "col_2"), function(i) map_dfc(c(1, 2), function(n)
df %>%
transmute(!!paste0(i, "_lag_", n, collapse = "") := lag(!!rlang::sym(i), n = n, by = time_col)))))
## A tibble: 26 x 7
# time_col col_1 col_2 col_1_lag_1 col_1_lag_2 col_2_lag_1 col_2_lag_2
# <int> <chr> <chr> <chr> <chr> <chr> <chr>
# 1 1 a z NA NA NA NA
# 2 2 b y a NA z NA
# 3 3 c x b a y z
# 4 4 d w c b x y
# 5 5 e v d c w x
# 6 6 f u e d v w
# 7 7 g t f e u v
# 8 8 h s g f t u
# 9 9 i r h g s t
#10 10 j q i h r s
## ... with 16 more rows