有关计算行/条目/观察值之间的时间差异的堆栈流量的大量内容。但是,我很难理解为什么我会在不寻常的位置上找到NA。
我有3列,DATETIME是posixlt,GRP800是组(因子),TIME800应该表示每组观察之间经过的时间。我的特定代码来自Calculate differences between rows faster than a for loop?。
df$TIME800<-unlist(by(df$DATETIME,df$GRP800,function(x)c(NA,diff(x))))
它似乎对第一组正常运作,但后来我在第二组中间获得了NA。我已经尝试了几种使用diff的方法,它产生了相同的输出。我很困惑。任何建议将不胜感激。
DATETIME GRP800 TIME800
1 2013-07-16 16:01:30 1 NA
2 2013-07-16 20:00:54 1 3.990000
3 2013-07-17 00:01:30 1 4.010000
4 2013-07-17 04:01:00 1 3.991667
5 2013-07-17 08:00:50 1 3.997222
6 2013-07-17 12:01:46 1 4.015556
7 2013-07-17 16:00:50 1 3.984444
8 2013-07-17 20:01:00 1 4.002778
9 2013-07-18 00:01:18 1 4.005000
10 2013-07-18 04:01:02 1 3.995556
11 2013-07-18 08:00:50 1 3.996667
12 2013-07-18 12:01:18 2 NA
13 2013-07-18 16:01:02 2 3.970833
14 2013-07-18 20:00:59 2 4.007500
15 2013-07-19 00:01:31 2 3.997222
16 2013-07-19 04:01:18 2 4.011111
17 2013-07-19 08:01:02 2 NA
18 2013-07-19 12:01:57 2 2.007500
19 2013-07-19 20:01:00 2 NA
20 2013-07-20 00:01:00 2 2.003333
> dput(df[1:20,])
structure(list(DATETIME = structure(list(sec = c(30, 54, 30,
0, 50, 46, 50, 0, 18, 2, 50, 18, 2, 59, 31, 18, 2, 57, 0, 0),
min = c(1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L), hour = c(16L, 20L, 0L, 4L, 8L,
12L, 16L, 20L, 0L, 4L, 8L, 12L, 16L, 20L, 0L, 4L, 8L, 12L,
20L, 0L), mday = c(16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L,
18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 20L
), mon = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), year = c(113L, 113L, 113L,
113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L, 113L,
113L, 113L, 113L, 113L, 113L, 113L, 113L), wday = c(2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 5L, 6L), yday = c(196L, 196L, 197L, 197L, 197L, 197L,
197L, 197L, 198L, 198L, 198L, 198L, 198L, 198L, 199L, 199L,
199L, 199L, 199L, 200L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
zone = c("MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT",
"MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT",
"MDT", "MDT", "MDT", "MDT"), gmtoff = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst",
"zone", "gmtoff"), class = c("POSIXlt", "POSIXt")), GRP800 = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), TIME800 = c(NA, 3.99, 4.01, 3.991666667, 3.997222222,
4.015555556, 3.984444444, 4.002777778, 4.005, 3.995555556, 3.996666667,
NA, 3.970833333, 4.0075, 3.997222222, 4.011111111, NA, 2.0075,
NA, 2.003333333)), .Names = c("DATETIME", "GRP800", "TIME800"
), row.names = c(NA, 20L), class = "data.frame")