在所有列上动态应用滞后功能

时间:2018-07-06 11:41:44

标签: r

嗨,我有100列的数据框,我正在尝试计算current_value -lag(current_value)
我可以为单个列计算,但无法动态地在所有可用列上运行代码

Sample df  


     Class <- c("A","A","A","A","B","B","B","C","C","C","C","C","C")
        A<-c(23,33,45,56,22,34,34,45,65,5,57,75,57)
        D<-c(2,133,5,60,23,312,341,25,75,50,3,9,21)
        M<-c(34,35,67,325,46,56,547,47,67,67,68,3,12)

        df <- data.frame(Class,A,D,M)

我尝试使用以下代码

df <- df %>% group_by(Class) %>%
          mutate(A_lag =(A-lag(A)))

帮助我动态计算所有小伙子的列 预先感谢

2 个答案:

答案 0 :(得分:1)

Class <- c("A","A","A","A","B","B","B","C","C","C","C","C","C")
A<-c(23,33,45,56,22,34,34,45,65,5,57,75,57)
D<-c(2,133,5,60,23,312,341,25,75,50,3,9,21)
M<-c(34,35,67,325,46,56,547,47,67,67,68,3,12)

df <- data.frame(Class,A,D,M)

library(dplyr)

df %>% 
  group_by(Class) %>%
  mutate_all(~.-lag(.)) %>%
  ungroup()

# # A tibble: 13 x 4
#   Class     A     D     M
#   <fct> <dbl> <dbl> <dbl>
# 1 A        NA    NA    NA
# 2 A        10   131     1
# 3 A        12  -128    32
# 4 A        11    55   258
# 5 B        NA    NA    NA
# 6 B        12   289    10
# 7 B         0    29   491
# 8 C        NA    NA    NA
# 9 C        20    50    20
#10 C       -60   -25     0
#11 C        52   -47     1
#12 C        18     6   -65
#13 C       -18    12     9

或者如果要将新列添加到现有列中

df %>% 
  group_by(Class) %>%
  mutate_all(funs(new = .-lag(.))) %>%
  ungroup()

# # A tibble: 13 x 7
#   Class     A     D     M A_new D_new M_new
#   <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 A        23     2    34    NA    NA    NA
# 2 A        33   133    35    10   131     1
# 3 A        45     5    67    12  -128    32
# 4 A        56    60   325    11    55   258
# 5 B        22    23    46    NA    NA    NA
# 6 B        34   312    56    12   289    10
# 7 B        34   341   547     0    29   491
# 8 C        45    25    47    NA    NA    NA
# 9 C        65    75    67    20    50    20
#10 C         5    50    67   -60   -25     0
#11 C        57     3    68    52   -47     1
#12 C        75     9     3    18     6   -65
#13 C        57    21    12   -18    12     9

答案 1 :(得分:1)

使用data.table并仅计算数字变量的滞后时间:

library(data.table)
setDT(df)

df[,
   lapply(.SD, function(x) x - shift(x)), 
   by = Class, 
   .SDcols = (sapply(df, is.numeric))
  ]
    Class   A    D   M
 1:     A  NA   NA  NA
 2:     A  10  131   1
 3:     A  12 -128  32
 4:     A  11   55 258
 5:     B  NA   NA  NA
 6:     B  12  289  10
 7:     B   0   29 491
 8:     C  NA   NA  NA
 9:     C  20   50  20
10:     C -60  -25   0
11:     C  52  -47   1
12:     C  18    6 -65
13:     C -18   12   9