如何创建滞后变量

时间:2015-08-09 13:55:03

标签: r lag

我想为变量pm10创建滞后变量并使用以下代码。但是,我无法得到我想要的东西。我怎么能创建pm10的延迟?

df2$l1pm10 <- lag(df2$pm10, -1, na.pad = TRUE)
df2$l1pm102 <- lag(df2$pm10, 1)

dput(df2)
structure(list(var1 = 1:10, pm10 = c(26.956073733, NA, 32.838694951, 
39.9560737332, NA, 40.9560737332, 33.956073733, 28.956073733, 
32.348770798, NA), l1pm10 = structure(c(26.956073733, NA, 32.838694951, 
39.9560737332, NA, 40.9560737332, 33.956073733, 28.956073733, 
32.348770798, NA), .Tsp = c(2, 11, 1))), .Names = c("var1", "pm10", 
"l1pm10"), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", 
"9", "10"), class = "data.frame")

5 个答案:

答案 0 :(得分:6)

在基数R中,函数allprojects { repositories { jcenter() } // Allow 400 errors. gradle.projectsEvaluated { tasks.withType(JavaCompile) { options.compilerArgs << "-Xmaxerrs" << "400" } } } 对时间序列对象很有用。这里有一个数据框,情况有所不同。

您可以尝试以下内容,我承认这不是很优雅:

lag()

另一种方法是使用df2$l1pm10 <- sapply(1:nrow(df2), function(x) df2$pm10[x+1]) df2$l1pm102 <- sapply(1:nrow(df2), function(x) df2$pm10[x-1]) #> df2 # var1 pm10 l1pm10 l1pm102 #1 1 26.95607 NA #2 2 NA 32.83869 26.95607 #3 3 32.83869 39.95607 NA #4 4 39.95607 NA 32.83869 #5 5 NA 40.95607 39.95607 #6 6 40.95607 33.95607 NA #7 7 33.95607 28.95607 40.95607 #8 8 28.95607 32.34877 33.95607 #9 9 32.34877 NA 28.95607 #10 10 NA NA 32.34877 包中的Lag()函数(使用大写&#34; L&#34;):

Hmisc

答案 1 :(得分:6)

另一种方法是使用包中的shift - 函数:

library(data.table)
setDT(df2)[, c("l1pm10","l1pm102") := .(shift(pm10, 1L, fill = NA, type = "lag"),
                                        shift(pm10, 1L, fill = NA, type = "lead"))]

这给出了:

> df2
    var1     pm10   l1pm10  l1pm102
 1:    1 26.95607       NA       NA
 2:    2       NA 26.95607 32.83869
 3:    3 32.83869       NA 39.95607
 4:    4 39.95607 32.83869       NA
 5:    5       NA 39.95607 40.95607
 6:    6 40.95607       NA 33.95607
 7:    7 33.95607 40.95607 28.95607
 8:    8 28.95607 33.95607 32.34877
 9:    9 32.34877 28.95607       NA
10:   10       NA 32.34877       NA

使用过的数据:

df2 <- structure(list(var1 = 1:10, pm10 = c(26.956073733, NA, 32.838694951, 
39.9560737332, NA, 40.9560737332, 33.956073733, 28.956073733, 
32.348770798, NA)), .Names = c("var1", "pm10"), row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10"), class = "data.frame")

答案 2 :(得分:4)

我知道这个问题已被接受,但几个月前我遇到了同样的问题(在this问题中),我想创建一个自制的lag函数。 这是代码:

 df2$lagpm10 <- c(NA, df2$pm10[seq_along(df2$pm10) -1])

 df2
   var1     pm10   l1pm10  lagpm10
1     1 26.95607 26.95607       NA
2     2       NA       NA 26.95607
3     3 32.83869 32.83869       NA
4     4 39.95607 39.95607 32.83869
5     5       NA       NA 39.95607
6     6 40.95607 40.95607       NA
7     7 33.95607 33.95607 40.95607
8     8 28.95607 28.95607 33.95607
9     9 32.34877 32.34877 28.95607
10   10       NA       NA 32.34877

基准

其中Rhertel1和Rhertel2是Rhertel和Sabdem的两行代码是我的。

Unit: microseconds
     expr     min      lq      mean   median       uq       max neval
 Rhertel1 250.523 257.740 272.07275 260.3355 264.0945  3540.187 10000
 Rhertel2 246.641 253.887 271.77003 256.5380 260.4935 14637.791 10000
   Sabdem  57.762  60.521  65.85315  61.3765  62.6050 12275.979 10000

答案 3 :(得分:0)

我想为假人提供的解决方案只是创建向量或列的“滞后”版本(在第一个位置添加NA),然后将列绑定在一起:

x<-1:10;    #Example vector

x_lagged <- c(NA, x[1:(length(x)-1)]); 

new_x <- cbind(x,x_lagged);

答案 4 :(得分:0)

您可以按如下方式使用 headtail

df2$l1pm10 <- c(tail(df2$pm10, -1), NA)
df2$l1pm102 <- c(NA, head(df2$pm10, -1))
df2
#R>    var1     pm10   l1pm10  l1pm102
#R> 1     1 26.95607       NA       NA
#R> 2     2       NA 32.83869 26.95607
#R> 3     3 32.83869 39.95607       NA
#R> 4     4 39.95607       NA 32.83869
#R> 5     5       NA 40.95607 39.95607
#R> 6     6 40.95607 33.95607       NA
#R> 7     7 33.95607 28.95607 40.95607
#R> 8     8 28.95607 32.34877 33.95607
#R> 9     9 32.34877       NA 28.95607
#R> 10   10       NA       NA 32.34877

# or with transfrom 
transform(df2, l1pm10 = c(tail(pm10, -1), NA), l1pm102 = c(NA, head(pm10, -1)))
#R>    var1     pm10   l1pm10  l1pm102
#R> 1     1 26.95607       NA       NA
#R> 2     2       NA 32.83869 26.95607
#R> 3     3 32.83869 39.95607       NA
#R> 4     4 39.95607       NA 32.83869
#R> 5     5       NA 40.95607 39.95607
#R> 6     6 40.95607 33.95607       NA
#R> 7     7 33.95607 28.95607 40.95607
#R> 8     8 28.95607 32.34877 33.95607
#R> 9     9 32.34877       NA 28.95607
#R> 10   10       NA       NA 32.34877

你可以用这两个写一个通用的滞后函数,如下

lag_func <- function(x, k = 1, pad = NA){
  if(k == 0)
    return(x)
  nas <- rep(pad, min(length(x), abs(k)))
  if(k < 0)
    c(tail(x, k), nas) else c(nas, head(x, -k))
}

# use the function to lag the variable
sapply((-11):11, lag_func, x = df2$pm10)
#R>       [,1] [,2] [,3]     [,4]     [,5]     [,6]     [,7]     [,8]     [,9]
#R>  [1,]   NA   NA   NA 32.34877 28.95607 33.95607 40.95607       NA 39.95607
#R>  [2,]   NA   NA   NA       NA 32.34877 28.95607 33.95607 40.95607       NA
#R>  [3,]   NA   NA   NA       NA       NA 32.34877 28.95607 33.95607 40.95607
#R>  [4,]   NA   NA   NA       NA       NA       NA 32.34877 28.95607 33.95607
#R>  [5,]   NA   NA   NA       NA       NA       NA       NA 32.34877 28.95607
#R>  [6,]   NA   NA   NA       NA       NA       NA       NA       NA 32.34877
#R>  [7,]   NA   NA   NA       NA       NA       NA       NA       NA       NA
#R>  [8,]   NA   NA   NA       NA       NA       NA       NA       NA       NA
#R>  [9,]   NA   NA   NA       NA       NA       NA       NA       NA       NA
#R> [10,]   NA   NA   NA       NA       NA       NA       NA       NA       NA
#R>          [,10]    [,11]    [,12]    [,13]    [,14]    [,15]    [,16]    [,17]
#R>  [1,] 32.83869       NA 26.95607       NA       NA       NA       NA       NA
#R>  [2,] 39.95607 32.83869       NA 26.95607       NA       NA       NA       NA
#R>  [3,]       NA 39.95607 32.83869       NA 26.95607       NA       NA       NA
#R>  [4,] 40.95607       NA 39.95607 32.83869       NA 26.95607       NA       NA
#R>  [5,] 33.95607 40.95607       NA 39.95607 32.83869       NA 26.95607       NA
#R>  [6,] 28.95607 33.95607 40.95607       NA 39.95607 32.83869       NA 26.95607
#R>  [7,] 32.34877 28.95607 33.95607 40.95607       NA 39.95607 32.83869       NA
#R>  [8,]       NA 32.34877 28.95607 33.95607 40.95607       NA 39.95607 32.83869
#R>  [9,]       NA       NA 32.34877 28.95607 33.95607 40.95607       NA 39.95607
#R> [10,]       NA       NA       NA 32.34877 28.95607 33.95607 40.95607       NA
#R>          [,18]    [,19]    [,20]    [,21] [,22] [,23]
#R>  [1,]       NA       NA       NA       NA    NA    NA
#R>  [2,]       NA       NA       NA       NA    NA    NA
#R>  [3,]       NA       NA       NA       NA    NA    NA
#R>  [4,]       NA       NA       NA       NA    NA    NA
#R>  [5,]       NA       NA       NA       NA    NA    NA
#R>  [6,]       NA       NA       NA       NA    NA    NA
#R>  [7,] 26.95607       NA       NA       NA    NA    NA
#R>  [8,]       NA 26.95607       NA       NA    NA    NA
#R>  [9,] 32.83869       NA 26.95607       NA    NA    NA
#R> [10,] 39.95607 32.83869       NA 26.95607    NA    NA