我有一个具有不同时间步长的数据框,我想将其转换为偶数时间步长。每10分钟应写入一个值,如果没有新值,则应采用前一个值(请参阅2019-01-01 01:00:00和2019-01-01 02:30:00)。
date ZUL_T
1 2019-01-01 00:04:00 23.3
2 2019-01-01 00:15:00 23.3
3 2019-01-01 00:26:00 19.9
4 2019-01-01 00:37:00 20.7
5 2019-01-01 00:48:00 21.9
6 2019-01-01 00:59:00 21.9
7 2019-01-01 01:10:00 18.8
8 2019-01-01 01:22:00 18.8
9 2019-01-01 01:33:00 20.7
10 2019-01-01 01:44:00 21.6
11 2019-01-01 01:55:00 19.2
12 2019-01-01 02:06:00 19.2
13 2019-01-01 02:17:00 19.6
14 2019-01-01 02:29:00 19.6
15 2019-01-01 02:40:00 20.5
这是我当前的代码,但是如果DS中没有值,则会缺少一些时间步骤。
library(lubridate)
lowtime <- min(DS$date)
hightime <- max(DS$date)
# Set the minute and second to the nearest 10 minute value
minute(lowtime) <- floor(minute(lowtime)/10) * 10
minute(hightime) <- ceiling(minute(hightime)/10) * 10
second(lowtime) <- 0
second(hightime) <- 0
# Set the breakpoints at 10 minute intervals
breakpoints <- seq.POSIXt(lowtime, hightime, by = 600)
ZUL_T <- aggregate(ZUL_T ~ cut(date, breaks = breakpoints), DS, mean)
> data
date ZUL_T
1 2019-01-01 00:00:00 23.3
2 2019-01-01 00:10:00 23.3
3 2019-01-01 00:20:00 19.9
4 2019-01-01 00:30:00 20.7
5 2019-01-01 00:40:00 21.9
6 2019-01-01 00:50:00 21.9
7 2019-01-01 01:10:00 18.8
8 2019-01-01 01:20:00 18.8
9 2019-01-01 01:30:00 20.7
10 2019-01-01 01:40:00 21.6
11 2019-01-01 01:50:00 19.2
12 2019-01-01 02:00:00 19.2
13 2019-01-01 02:10:00 19.6
14 2019-01-01 02:20:00 19.6
15 2019-01-01 02:40:00 20.5
答案 0 :(得分:0)
您可以将merge
以breakpoints
作为数据框。
# first, you probably need 10 min later in time
minute(hightime) <- ceiling((minute(max(DS$date)) + 10)/10) * 10
breakpoints <- seq.POSIXt(lowtime, hightime, by=600)
使用经典列表符号中的aggregate
来获取专有名称。
ZUL_T <- aggregate(list(ZUL_T=DS$ZUL_T), list(date=cut(DS$date, breaks=breakpoints)), mean)
现在merge
,
ZUL_T <- merge(transform(ZUL_T, date=as.character(date)),
data.frame(date=as.character(breakpoints[-length(breakpoints)]),
stringsAsFactors=F),
all=TRUE)
并用值-1替换NA
值。
ZUL_T$ZUL_T[is.na(ZUL_T$ZUL_T)] <- ZUL_T$ZUL_T[which(is.na(ZUL_T$ZUL_T)) - 1]
ZUL_T
# date ZUL_T
# 1 2019-01-01 00:00:00 23.3
# 2 2019-01-01 00:10:00 23.3
# 3 2019-01-01 00:20:00 19.9
# 4 2019-01-01 00:30:00 20.7
# 5 2019-01-01 00:40:00 21.9
# 6 2019-01-01 00:50:00 21.9
# 7 2019-01-01 01:00:00 21.9
# 8 2019-01-01 01:10:00 18.8
# 9 2019-01-01 01:20:00 18.8
# 10 2019-01-01 01:30:00 20.7
# 11 2019-01-01 01:40:00 21.6
# 12 2019-01-01 01:50:00 19.2
# 13 2019-01-01 02:00:00 19.2
# 14 2019-01-01 02:10:00 19.6
# 15 2019-01-01 02:20:00 19.6
# 16 2019-01-01 02:30:00 19.6
# 17 2019-01-01 02:40:00 20.5
答案 1 :(得分:0)
我们可以使用floor_date
包中的lubridate
来每10分钟减少一次时间,并取一个下限,按它和sum
ZUL_T
值分组。
library(dplyr)
library(lubridate)
library(tidyr)
df %>%
group_by(date = floor_date(ymd_hms(date), "10 mins")) %>%
summarise(ZUL_T = sum(ZUL_T))
# date ZUL_T
# <dttm> <dbl>
# 1 2019-01-01 00:00:00 23.3
# 2 2019-01-01 00:10:00 23.3
# 3 2019-01-01 00:20:00 19.9
# 4 2019-01-01 00:30:00 20.7
# 5 2019-01-01 00:40:00 21.9
# 6 2019-01-01 00:50:00 21.9
# 7 2019-01-01 01:10:00 18.8
# 8 2019-01-01 01:20:00 18.8
# 9 2019-01-01 01:30:00 20.7
#10 2019-01-01 01:40:00 21.6
#11 2019-01-01 01:50:00 19.2
#12 2019-01-01 02:00:00 19.2
#13 2019-01-01 02:10:00 19.6
#14 2019-01-01 02:20:00 19.6
#15 2019-01-01 02:40:00 20.5
,然后使用complete
和fill
完成丢失的组合,并用先前的值填充NA
的值。
df %>%
group_by(date = floor_date(ymd_hms(date), "10 mins")) %>%
summarise(ZUL_T = sum(ZUL_T))
complete(date = seq(min(date), max(date), "10 mins")) %>%
fill(ZUL_T)
# date ZUL_T
# <dttm> <dbl>
# 1 2019-01-01 00:00:00 23.3
# 2 2019-01-01 00:10:00 23.3
# 3 2019-01-01 00:20:00 19.9
# 4 2019-01-01 00:30:00 20.7
# 5 2019-01-01 00:40:00 21.9
# 6 2019-01-01 00:50:00 21.9
# 7 2019-01-01 01:00:00 21.9
# 8 2019-01-01 01:10:00 18.8
# 9 2019-01-01 01:20:00 18.8
#10 2019-01-01 01:30:00 20.7
#11 2019-01-01 01:40:00 21.6
#12 2019-01-01 01:50:00 19.2
#13 2019-01-01 02:00:00 19.2
#14 2019-01-01 02:10:00 19.6
#15 2019-01-01 02:20:00 19.6
#16 2019-01-01 02:30:00 19.6
#17 2019-01-01 02:40:00 20.5
数据
df <- structure(list(date = structure(1:15, .Label = c("2019-01-01 00:04:00",
"2019-01-01 00:15:00", "2019-01-01 00:26:00", "2019-01-01 00:37:00",
"2019-01-01 00:48:00", "2019-01-01 00:59:00", "2019-01-01 01:10:00",
"2019-01-01 01:22:00", "2019-01-01 01:33:00", "2019-01-01 01:44:00",
"2019-01-01 01:55:00", "2019-01-01 02:06:00", "2019-01-01 02:17:00",
"2019-01-01 02:29:00", "2019-01-01 02:40:00"), class = "factor"),
ZUL_T = c(23.3, 23.3, 19.9, 20.7, 21.9, 21.9, 18.8, 18.8,
20.7, 21.6, 19.2, 19.2, 19.6, 19.6, 20.5)),
class = "data.frame", row.names = c(NA,-15L))