我有一个时间列表:
> sapply(copy, class)
$timefact
[1] "POSIXct" "POSIXt"
$timefact_hour
[1] "integer"
> head(copy)
timefact timefact_hour
1 2016-04-07 23:42:00 23
2 2016-04-07 23:37:00 23
3 2016-04-07 23:31:00 23
4 2016-04-07 23:27:00 23
5 2016-04-07 23:19:00 23
6 2016-04-07 23:17:00 23
我的目标是创建第三列,它将是一个整数值,表示给定行与其上方行之间的分钟数差异。
因此输出应该如下所示:
> output
timefact timefact_hour timediff
1 2016-04-07 23:42:00 23 NA
2 2016-04-07 23:37:00 23 5
3 2016-04-07 23:31:00 23 6
4 2016-04-07 23:27:00 23 4
5 2016-04-07 23:19:00 23 8
6 2016-04-07 23:17:00 23 2
但是,我想排除所有小时数不同的情况,并且只查找与上一行共享相同小时的行的差异。例如:
> output
timefact timefact_hour timediff
90 2016-04-07 12:14:00 12 6
91 2016-04-07 12:04:00 12 10
92 2016-04-07 11:56:00 11 NA
93 2016-04-07 11:49:00 11 7
94 2016-04-07 11:42:00 11 7
95 2016-04-07 11:36:00 11 6
到目前为止,我已经提出了以下代码,
for(i in 2:nrow(copy)) {
print(difftime(copy[i,"timefact"], copy[i-1,"timefact"], tz = "EST", units = "mins"))
}
这似乎有效,但我不知道如何a)将它作为第三列添加到数据框中,b)跳过上面的行不是同一小时的行。任何意见,将不胜感激!我在下面列出了一些数据样本:
> dput(copy)
structure(list(timefact = structure(list(sec = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(42L, 37L, 31L, 27L,
19L, 17L, 10L, 6L, 1L, 56L, 50L, 45L, 34L, 27L, 18L, 4L, 58L,
53L, 50L, 44L, 44L, 37L, 34L, 28L, 23L, 16L, 12L, 4L, 59L, 50L,
19L, 13L, 46L, 26L, 26L, 19L, 11L, 8L, 2L, 55L, 51L, 44L, 37L,
31L, 9L, 0L, 48L, 43L, 34L, 30L, 10L, 6L, 57L, 52L, 44L, 39L,
30L, 23L, 23L, 1L, 1L, 54L, 48L, 32L, 23L, 16L, 12L, 5L, 1L,
48L, 44L, 37L, 27L, 18L, 13L, 6L, 0L, 39L, 31L, 23L, 17L, 4L,
54L, 49L, 44L, 38L, 33L, 24L, 20L, 14L, 4L, 56L, 49L, 42L, 36L,
48L, 36L, 31L, 22L, 14L), hour = c(23L, 23L, 23L, 23L, 23L, 23L,
23L, 23L, 23L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 20L, 20L, 20L, 20L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 17L, 17L, 17L, 17L, 17L, 17L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 13L, 13L, 13L, 13L, 13L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 10L, 10L,
10L, 10L, 10L), mday = c(7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), mon = c(3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), year = c(116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L,
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L,
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L,
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L,
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L,
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L,
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L,
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L,
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L,
116L, 116L, 116L, 116L), wday = c(4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), yday = c(97L,
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L,
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L,
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L,
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L,
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L,
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L,
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L,
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L), isdst = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), zone = c("EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST",
"EST", "EST", "EST"), gmtoff = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour",
"mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"
), class = c("POSIXlt", "POSIXt"), tzone = c("EST", "EST", " "
)), timefact_hour = c(23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L,
23L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L, 21L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 20L, 20L, 20L, 20L, 19L, 19L,
19L, 19L, 19L, 19L, 19L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L,
17L, 17L, 17L, 17L, 17L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 13L, 13L, 13L, 13L, 13L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 10L, 10L, 10L, 10L,
10L)), .Names = c("timefact", "timefact_hour"), row.names = c(NA,
100L), class = "data.frame")
答案 0 :(得分:1)
首先将timefact
列转换为类POSIXct
copy$timefact <- as.POSIXct(copy$timefact)
然后,使用lubridate
和dplyr
:
library(lubridate)
library(lubridate)
copy %>%
group_by(timefact_hour) %>%
mutate(timediff = lag(minute(timefact) - lead(minute(timefact))))
或data.table
:
library(data.table)
setDT(copy)[, timediff := shift(minute(timefact) -
shift(minute(timefact), type = "lead")),
by = timefact_hour]