dplyr:在增加data_frame的同时添加多个滞后

时间:2019-05-22 16:45:01

标签: r dplyr

我想添加多个数据滞后,但我希望整个数据都滞后,而不是被data_frame的现有高度截断。

以下是一些用于创建多个滞后的基本代码(HT:https://gist.github.com/drsimonj/2038ff9f9c67063f384f10fac95de566):

# create a basic data_frame
df_foo = data_frame(
  x = 1:12,
  y = runif(12)
)

# create functions to generate multiple lags
lags = 1:3
lag_names = paste0("(Lag ", lags, ")")
lag_functions = setNames(paste("dplyr::lag(., ", lags, ")"), lag_names)

# generate multiple lags
df_foo_lag = df_foo %>% 
  mutate_at(
    vars("x", "y"),
    funs_(lag_functions)
  ) 

这给出了:

> df_foo_lag
# A tibble: 12 x 8
       x      y `x_(Lag 1)` `y_(Lag 1)` `x_(Lag 2)` `y_(Lag 2)` `x_(Lag 3)` `y_(Lag 3)`
   <int>  <dbl>       <int>       <dbl>       <int>       <dbl>       <int>       <dbl>
 1     1 0.847           NA      NA              NA      NA              NA      NA    
 2     2 0.966            1       0.847          NA      NA              NA      NA    
 3     3 0.231            2       0.966           1       0.847          NA      NA    
 4     4 0.324            3       0.231           2       0.966           1       0.847
 5     5 0.350            4       0.324           3       0.231           2       0.966
 6     6 0.750            5       0.350           4       0.324           3       0.231
 7     7 0.415            6       0.750           5       0.350           4       0.324
 8     8 0.377            7       0.415           6       0.750           5       0.350
 9     9 0.474            8       0.377           7       0.415           6       0.750
10    10 0.108            9       0.474           8       0.377           7       0.415
11    11 0.398           10       0.108           9       0.474           8       0.377
12    12 0.0450          11       0.398          10       0.108           9       0.474

但这不是我想要的。我希望将行添加到data_frame的底部,以便添加整个滞后序列:

# what is required
df_foo_lag %>% 
  add_row(
    x = NA,
    y = NA, 
    `x_(Lag 1)` = 12,
    `y_(Lag 1)` = 0.768,
    `x_(Lag 2)` = 11,
    `y_(Lag 2)` =  0.307,
    `x_(Lag 3)` = 10,
    `y_(Lag 3)` = 0.299
  ) %>% 
  add_row(
    x = NA,
    y = NA, 
    `x_(Lag 1)` = NA,
    `y_(Lag 1)` = NA,
    `x_(Lag 2)` = 12,
    `y_(Lag 2)` =  0.768,
    `x_(Lag 3)` = 11,
    `y_(Lag 3)` = 0.307
  ) %>% 
  add_row(
    x = NA,
    y = NA, 
    `x_(Lag 1)` = NA,
    `y_(Lag 1)` = NA,
    `x_(Lag 2)` = NA,
    `y_(Lag 2)` =  NA,
    `x_(Lag 3)` = 12,
    `y_(Lag 3)` = 0.768
  )

给出我想要的:

# A tibble: 15 x 8
       x       y `x_(Lag 1)` `y_(Lag 1)` `x_(Lag 2)` `y_(Lag 2)` `x_(Lag 3)` `y_(Lag 3)`
   <int>   <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
 1     1  0.847           NA      NA              NA      NA              NA      NA    
 2     2  0.966            1       0.847          NA      NA              NA      NA    
 3     3  0.231            2       0.966           1       0.847          NA      NA    
 4     4  0.324            3       0.231           2       0.966           1       0.847
 5     5  0.350            4       0.324           3       0.231           2       0.966
 6     6  0.750            5       0.350           4       0.324           3       0.231
 7     7  0.415            6       0.750           5       0.350           4       0.324
 8     8  0.377            7       0.415           6       0.750           5       0.350
 9     9  0.474            8       0.377           7       0.415           6       0.750
10    10  0.108            9       0.474           8       0.377           7       0.415
11    11  0.398           10       0.108           9       0.474           8       0.377
12    12  0.0450          11       0.398          10       0.108           9       0.474
13    NA NA               12       0.768          11       0.307          10       0.299
14    NA NA               NA      NA              12       0.768          11       0.307
15    NA NA               NA      NA              NA      NA              12       0.768

实现此目标的编程方式是什么?

谢谢。

2 个答案:

答案 0 :(得分:1)

一个选项是

library(tidyverse)
library(readr)
l1 <- map(c(0, lags), ~ df_foo %>% 
            summarise_all(list(~ list(c(rep(NA_real_, .x), .)))) %>% 
                 unnest)
res <-  do.call(cbind.fill, c(l1, fill = NA))
names(res)[-(1:2)] <- paste(names(df_foo), 
        rep(lag_names, each = ncol(df_foo)), sep="_")

答案 1 :(得分:1)

您可以在计算滞后之前简单地添加行:

# generate multiple lags
df_foo_lag = df_foo %>% 
  bind_rows(tibble(.rows = max(lags))) %>% 
  mutate_at(
    vars("x", "y"),
    funs_(lag_functions)
  )