鉴于在指定观察期内发生的一组事件(时间间隔),我试图找到没有事件发生的时间间隔。我们可以假设事件之间没有重叠。是否有比下面的方法更有效/更智能的方法?
测试df :
events <- data.frame(eventStartTime = c("2019-01-20 18:03:00", "2019-01-20 18:10:00", "2019-01-20 18:50:00"),
eventEndTime = c("2019-01-20 18:05:00", "2019-01-20 18:20:00", "2019-01-20 18:55:00"))
events <- as.data.frame(lapply(events[,c('eventStartTime', 'eventEndTime')], as.POSIXct, format = "%Y-%m-%d %H:%M:%S", tz = "CET"))
预期产量:
complementEvents <- data.frame(complementStartTime = c("2019-01-20 18:00:00", "2019-01-20 18:05:00", "2019-01-20 18:20:00", "2019-01-20 18:55:00"),
complementEndTime = c("2019-01-20 18:03:00", "2019-01-20 18:10:00", "2019-01-20 18:50:00", "2019-01-20 19:00:00"))
complementEvents <- as.data.frame(lapply(complementEvents[,c('complementStartTime', 'complementEndTime')], as.POSIXct, format = "%Y-%m-%d %H:%M:%S", tz = "CET"))
我想要实现的视觉效果
library(ggplot2)
options(stringsAsFactors = FALSE)
events$type <- rep("event", nrow(events))
complementEvents$type <- rep("complement event", nrow(complementEvents))
names(complementEvents) <- names(events)
observationStartTime <- as.POSIXct("2019-01-20 18:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "CET")
observationEndTime <- as.POSIXct("2019-01-20 19:00:00", format = "%Y-%m-%d %H:%M:%S", tz = "CET")
ggplot(data = rbind(events, complementEvents)) +
geom_rect(mapping=aes(xmin=eventStartTime, xmax=eventEndTime, ymin=0,
ymax=0.5, fill = type), alpha = 0.4)+
scale_y_continuous(limits = c(0,0.5))+
scale_x_datetime(date_breaks = "10 min",
date_labels = "%H:%M",
limits = c(observationStartTime, observationEndTime))+
scale_fill_manual(values=c("event"="#1a75ce", "complement event"="#fdbb2f"))+
theme_minimal()+
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
axis.title = element_blank(),
axis.text.y = element_blank(),
text = element_text(size = 12),
legend.position = "top")
我编写了以下函数:
findComplementIntervals <- function(data, obsStartTime, obsEndTime){
# find time intervals of complement events given an observation time interval
complementEvents <- data.frame()
temp <- data.frame(complementStartTime = data$eventEndTime, complementEndTime = lead(data$eventStartTime))
if (data$eventStartTime[1] == obsStartTime & data$eventEndTime[nrow(data)] == obsEndTime){
complementEvents <- temp[-nrow(temp),]
}else if (data$eventStartTime[1] == obsStartTime & data$eventEndTime[nrow(data)] < obsEndTime){
temp$complementEndTime[nrow(temp)] <- obsEndTime
complementEvents <- temp
}else if (data$eventStartTime[1] > obsStartTime & data$eventEndTime[nrow(data)] == obsEndTime){
complementEvents <- temp[-nrow(temp),]
complementEvents[nrow(complementEvents) + 1,] <- rep(NA,2)
complementEvents$complementStartTime[nrow(complementEvents)] <- obsStartTime
complementEvents$complementEndTime[nrow(complementEvents)] <- data$eventStartTime[1]
}else{
temp$complementEndTime[nrow(temp)] <- obsEndTime
complementEvents <- temp
complementEvents[nrow(complementEvents) + 1,] <- rep(NA,2)
complementEvents$complementStartTime[nrow(complementEvents)] <- obsStartTime
complementEvents$complementEndTime[nrow(complementEvents)] <- data$eventStartTime[1]
}
complementEvents <- complementEvents[order(complementEvents$complementStartTime),]
return(complementEvents)
}
还有另一种方法来查找补充事件/时间间隔更有效/更优雅吗?
答案 0 :(得分:0)
一种data.table
方法
#set window to analyse
startTime = as.POSIXct( "2019-01-20 18:00:00",
format = "%Y-%m-%d %H:%M:%S", tz = "CET")
endTime = as.POSIXct( "2019-01-20 19:00:00",
format = "%Y-%m-%d %H:%M:%S", tz = "CET")
library( data.table )
#create data.table with all minutes bewteen start - end
dt.mins <- data.table( minute = seq(startTime, endTime - 60, by = "1 mins"),
minute2 = seq(startTime +60 , endTime, by = "1 mins") )
#perform by-reference non-equi join
dt.mins[ setDT( events ), event := 1, on = c( "minute >= eventStartTime",
"minute < eventEndTime" ) ]
#set eventumber = 0 for minutes that fall outside events
dt.mins[ is.na( event ), event := 0 ]
#create group-numbers to summarise on later, using rleid()
dt.mins[, group := rleid( event ) ]
#summarise by group, on all rows (=minutes) that fall outside events
dt.mins[ event == 0, ][, list( complementStartTime = min( minute ),
complementEndTime = max( minute2 ) ),
by = .(group)][, group := NULL][]
输出
# complementStartTime complementEndTime
# 1: 2019-01-20 18:00:00 2019-01-20 18:03:00
# 2: 2019-01-20 18:05:00 2019-01-20 18:10:00
# 3: 2019-01-20 18:20:00 2019-01-20 18:50:00
# 4: 2019-01-20 18:55:00 2019-01-20 19:00:00