我有一系列事件及其时间。我可以使用def Compare(left, ops, comparators):
if not ops[0](left, comparators[0]):
return False
for i, comparator in enumerate(comparators[1:], start=1):
if not ops[i](comparators[i-1], comparator):
return False
return True
绘制它们的直方图,但我不知道如何制作它们的累积图。
这是我开始使用的数据类型。 (假设它已经是hist
格式)
POSIXct
例如,在我的直方图中,我有类似的东西:
> events$time
[1] 2015-10-05 16:58:41.986797 2015-10-05 16:59:23.389583
[3] 2015-10-05 16:59:44.99402 2015-10-05 16:59:53.225178
[5] 2015-10-05 16:59:59.594524 2015-10-05 17:00:05.555564
[7] 2015-10-05 17:00:44.173783 2015-10-05 17:00:46.289552
[9] 2015-10-05 17:00:56.772485 2015-10-05 17:01:18.937458
[11] 2015-10-05 17:02:04.661378
and so on for ~8000 values
我希望得到一个理由:
2015-10-05 4:00: 20 events
2015-10-05 4:15: 30 events
2015-10-05 4:30: 11 events
我该怎么做?
答案 0 :(得分:1)
可能的解决方案:
library(lubridate)
# example time data
time = c(
"2015-10-05 15:44:41.986797", "2015-10-05 15:59:23.389583", "2015-10-05 16:59:44.99402",
"2015-10-05 16:59:44.99402", "2015-10-05 16:59:44.99402", "2015-10-05 16:59:44.99402",
"2015-10-05 17:59:59.594524", "2015-10-05 17:59:59.594524", "2015-10-05 18:00:05.555564"
)
# transform time strings to POSIXct objects for count
time <- ymd_hms(time)
# count by second
event <- data.frame(table(time))
# transform time factors to POSIXct objects for df
event$time <- ymd_hms(event$time)
# find start and end time for 15min sequence
start <- round(min(event$time), "mins")
if (min(event$time) < start) {
minute(start) <- minute(start) - 1
}
while (minute(start) %% 15 != 0) {
minute(start) <- minute(start) - 1
}
end <- round(max(event$time), "mins")
if (max(event$time) > end) {
minute(end) <- minute(end) + 1
}
while (minute(end) %% 15 != 0) {
minute(end) <- minute(end) + 1
}
# create sequence and result data.frame
ft.seq <- seq(start, end, "15 mins")
ft.event <- data.frame(
start = ft.seq[1:(length(ft.seq)-1)],
end = ft.seq[2:(length(ft.seq))],
sum = 0
)
# ugly, nested loop to attribute values to 15min time slices
for (p1 in 1:nrow(ft.event)) {
for (p2 in 1:nrow(event)) {
if (event$time[p2] > ft.event$start[p1] &&
event$time[p2] < ft.event$end[p1]) {
ft.event$sum[p1] <- ft.event$sum[p1] + event$Freq[p2]
}
}
}
# cumsum
ft.event$cumsum <- cumsum(ft.event$sum)
# example plot
library(ggplot2)
ggplot(ft.event) +
geom_line(aes(x = end, y = cumsum))
答案 1 :(得分:0)
这是旧文章,但是给出的答案很长。
使用hist()
(就像OP一样),然后仅在结果对象上使用cumsum()
。
注意hist
对象中的开始和结束时间
library(tidyverse)
library(lubridate)
# example time data
time = c(
"2015-10-05 15:44:41.986797", "2015-10-05 15:59:23.389583", "2015-10-05 16:59:44.99402",
"2015-10-05 16:59:44.99402", "2015-10-05 16:59:44.99402", "2015-10-05 16:59:44.99402",
"2015-10-05 17:59:59.594524", "2015-10-05 17:59:59.594524", "2015-10-05 18:00:05.555564"
)
# transform time strings to POSIXct objects for count
time <- ymd_hms(time)
#Get start and end times
start_time=min(time) %>% floor_date("15 minutes")
end_time=max(time) %>% ceiling_date("15 minutes")
start_time
end_time
#get breaks for histogram
breaks=seq(start_time,end_time, by = as.difftime(minutes(15)))
#Create histogram
event_hist=hist(time,breaks, freq=T, plot=F)
#Organize results, calculate cumsum, all in a df
events_df=data.frame(start=as_datetime(event_hist$breaks[1:length(event_hist$breaks)-1], origin="1970-01-01 00:00:00"),
end=as_datetime(event_hist$breaks[2:length(event_hist$breaks)], origin="1970-01-01 00:00:00"),
count=event_hist$counts,
cumsum=cumsum(event_hist$counts))
## Now graph
library(ggplot2)
ggplot(events_df) +
geom_line(aes(x = end, y = cumsum))