在R中制作事件的累积图

时间:2016-11-04 21:01:57

标签: r

我有一系列事件及其时间。我可以使用def Compare(left, ops, comparators): if not ops[0](left, comparators[0]): return False for i, comparator in enumerate(comparators[1:], start=1): if not ops[i](comparators[i-1], comparator): return False return True 绘制它们的直方图,但我不知道如何制作它们的累积图。

这是我开始使用的数据类型。 (假设它已经是hist格式)

POSIXct

例如,在我的直方图中,我有类似的东西:

> events$time

 [1] 2015-10-05 16:58:41.986797 2015-10-05 16:59:23.389583
 [3] 2015-10-05 16:59:44.99402  2015-10-05 16:59:53.225178
 [5] 2015-10-05 16:59:59.594524 2015-10-05 17:00:05.555564
 [7] 2015-10-05 17:00:44.173783 2015-10-05 17:00:46.289552
 [9] 2015-10-05 17:00:56.772485 2015-10-05 17:01:18.937458
[11] 2015-10-05 17:02:04.661378

and so on for ~8000 values

我希望得到一个理由:

2015-10-05 4:00: 20 events
2015-10-05 4:15: 30 events
2015-10-05 4:30: 11 events

我该怎么做?

2 个答案:

答案 0 :(得分:1)

可能的解决方案:

library(lubridate)

# example time data
time = c(
  "2015-10-05 15:44:41.986797", "2015-10-05 15:59:23.389583", "2015-10-05 16:59:44.99402",
  "2015-10-05 16:59:44.99402", "2015-10-05 16:59:44.99402", "2015-10-05 16:59:44.99402",
  "2015-10-05 17:59:59.594524", "2015-10-05 17:59:59.594524", "2015-10-05 18:00:05.555564"
)

# transform time strings to POSIXct objects for count
time <- ymd_hms(time)

# count by second
event <- data.frame(table(time))

# transform time factors to POSIXct objects for df
event$time <- ymd_hms(event$time)

# find start and end time for 15min sequence
start <- round(min(event$time), "mins")
if (min(event$time) < start) {
  minute(start) <- minute(start) - 1
}
while (minute(start) %% 15 != 0) {
  minute(start) <- minute(start) - 1
}

end <- round(max(event$time), "mins")
if (max(event$time) > end) {
  minute(end) <- minute(end) + 1
}
while (minute(end) %% 15 != 0) {
  minute(end) <- minute(end) + 1
}

# create sequence and result data.frame
ft.seq <- seq(start, end, "15 mins")

ft.event <- data.frame(
  start = ft.seq[1:(length(ft.seq)-1)],
  end = ft.seq[2:(length(ft.seq))],
  sum = 0
)

# ugly, nested loop to attribute values to 15min time slices
for (p1 in 1:nrow(ft.event)) {
  for (p2 in 1:nrow(event)) {
    if (event$time[p2] > ft.event$start[p1] && 
        event$time[p2] < ft.event$end[p1]) {
      ft.event$sum[p1] <- ft.event$sum[p1] + event$Freq[p2]
    }
  }
}

# cumsum
ft.event$cumsum <- cumsum(ft.event$sum)

# example plot
library(ggplot2)

ggplot(ft.event) +
  geom_line(aes(x = end, y = cumsum))

答案 1 :(得分:0)

这是旧文章,但是给出的答案很长。

使用hist()(就像OP一样),然后仅在结果对象上使用cumsum()

注意hist对象中的开始和结束时间

library(tidyverse)
library(lubridate)

# example time data
time = c(
  "2015-10-05 15:44:41.986797", "2015-10-05 15:59:23.389583", "2015-10-05 16:59:44.99402",
  "2015-10-05 16:59:44.99402", "2015-10-05 16:59:44.99402", "2015-10-05 16:59:44.99402",
  "2015-10-05 17:59:59.594524", "2015-10-05 17:59:59.594524", "2015-10-05 18:00:05.555564"
)

# transform time strings to POSIXct objects for count
time <- ymd_hms(time)

#Get start and end times
start_time=min(time) %>% floor_date("15 minutes") 
end_time=max(time) %>% ceiling_date("15 minutes") 

start_time
end_time

#get breaks for histogram
breaks=seq(start_time,end_time, by = as.difftime(minutes(15)))

#Create histogram
event_hist=hist(time,breaks, freq=T, plot=F)

#Organize results, calculate cumsum, all in a df
events_df=data.frame(start=as_datetime(event_hist$breaks[1:length(event_hist$breaks)-1], origin="1970-01-01 00:00:00"),  
                 end=as_datetime(event_hist$breaks[2:length(event_hist$breaks)], origin="1970-01-01 00:00:00"),
                 count=event_hist$counts, 
                 cumsum=cumsum(event_hist$counts))

## Now graph
library(ggplot2)

ggplot(events_df) +
  geom_line(aes(x = end, y = cumsum))