我想添加多个数据滞后,但我希望整个数据都滞后,而不是被data_frame
的现有高度截断。
以下是一些用于创建多个滞后的基本代码(HT:https://gist.github.com/drsimonj/2038ff9f9c67063f384f10fac95de566):
# create a basic data_frame
df_foo = data_frame(
x = 1:12,
y = runif(12)
)
# create functions to generate multiple lags
lags = 1:3
lag_names = paste0("(Lag ", lags, ")")
lag_functions = setNames(paste("dplyr::lag(., ", lags, ")"), lag_names)
# generate multiple lags
df_foo_lag = df_foo %>%
mutate_at(
vars("x", "y"),
funs_(lag_functions)
)
这给出了:
> df_foo_lag
# A tibble: 12 x 8
x y `x_(Lag 1)` `y_(Lag 1)` `x_(Lag 2)` `y_(Lag 2)` `x_(Lag 3)` `y_(Lag 3)`
<int> <dbl> <int> <dbl> <int> <dbl> <int> <dbl>
1 1 0.847 NA NA NA NA NA NA
2 2 0.966 1 0.847 NA NA NA NA
3 3 0.231 2 0.966 1 0.847 NA NA
4 4 0.324 3 0.231 2 0.966 1 0.847
5 5 0.350 4 0.324 3 0.231 2 0.966
6 6 0.750 5 0.350 4 0.324 3 0.231
7 7 0.415 6 0.750 5 0.350 4 0.324
8 8 0.377 7 0.415 6 0.750 5 0.350
9 9 0.474 8 0.377 7 0.415 6 0.750
10 10 0.108 9 0.474 8 0.377 7 0.415
11 11 0.398 10 0.108 9 0.474 8 0.377
12 12 0.0450 11 0.398 10 0.108 9 0.474
但这不是我想要的。我希望将行添加到data_frame
的底部,以便添加整个滞后序列:
# what is required
df_foo_lag %>%
add_row(
x = NA,
y = NA,
`x_(Lag 1)` = 12,
`y_(Lag 1)` = 0.768,
`x_(Lag 2)` = 11,
`y_(Lag 2)` = 0.307,
`x_(Lag 3)` = 10,
`y_(Lag 3)` = 0.299
) %>%
add_row(
x = NA,
y = NA,
`x_(Lag 1)` = NA,
`y_(Lag 1)` = NA,
`x_(Lag 2)` = 12,
`y_(Lag 2)` = 0.768,
`x_(Lag 3)` = 11,
`y_(Lag 3)` = 0.307
) %>%
add_row(
x = NA,
y = NA,
`x_(Lag 1)` = NA,
`y_(Lag 1)` = NA,
`x_(Lag 2)` = NA,
`y_(Lag 2)` = NA,
`x_(Lag 3)` = 12,
`y_(Lag 3)` = 0.768
)
给出我想要的:
# A tibble: 15 x 8
x y `x_(Lag 1)` `y_(Lag 1)` `x_(Lag 2)` `y_(Lag 2)` `x_(Lag 3)` `y_(Lag 3)`
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.847 NA NA NA NA NA NA
2 2 0.966 1 0.847 NA NA NA NA
3 3 0.231 2 0.966 1 0.847 NA NA
4 4 0.324 3 0.231 2 0.966 1 0.847
5 5 0.350 4 0.324 3 0.231 2 0.966
6 6 0.750 5 0.350 4 0.324 3 0.231
7 7 0.415 6 0.750 5 0.350 4 0.324
8 8 0.377 7 0.415 6 0.750 5 0.350
9 9 0.474 8 0.377 7 0.415 6 0.750
10 10 0.108 9 0.474 8 0.377 7 0.415
11 11 0.398 10 0.108 9 0.474 8 0.377
12 12 0.0450 11 0.398 10 0.108 9 0.474
13 NA NA 12 0.768 11 0.307 10 0.299
14 NA NA NA NA 12 0.768 11 0.307
15 NA NA NA NA NA NA 12 0.768
实现此目标的编程方式是什么?
谢谢。
答案 0 :(得分:1)
一个选项是
library(tidyverse)
library(readr)
l1 <- map(c(0, lags), ~ df_foo %>%
summarise_all(list(~ list(c(rep(NA_real_, .x), .)))) %>%
unnest)
res <- do.call(cbind.fill, c(l1, fill = NA))
names(res)[-(1:2)] <- paste(names(df_foo),
rep(lag_names, each = ncol(df_foo)), sep="_")
答案 1 :(得分:1)
您可以在计算滞后之前简单地添加行:
# generate multiple lags
df_foo_lag = df_foo %>%
bind_rows(tibble(.rows = max(lags))) %>%
mutate_at(
vars("x", "y"),
funs_(lag_functions)
)