使用lubridate在任意时间段内查找间隔重叠

时间:2017-07-18 21:23:35

标签: r overlap lubridate

我的数据集具有Period列中表示的任意5分钟间隔时间段。不同观察的时段(IDs)在不同时间开始(对于ID = '83',它开始于49M,而对于ID = '90',它开始于50M)。观察结果是存储在Start.NEnd.N列中的时间间隔。

这最后两列有一个区间代表观察的连续时间或'事件'(不论周期),因此重复。一些观察结果具有多个“事件”,并且重复周期以适应那些额外的间隔

我的目标是计算事件和任意分档之间重叠的分钟数和秒数。为了澄清,第一行应该有重叠0M 0S,而第二行应该有5M 0S重叠,因为54M 0S - 59M 0S中包含54M 1S - 89M 0S

  ID Period Period.start Period.end Start.N  End.N
1 83      5       49M 0S     54M 0S  54M 1S 89M 0S
2 83     10       54M 0S     59M 0S  54M 1S 89M 0S
3 83     15       59M 0S     64M 0S  54M 1S 89M 0S
4 83     20       64M 0S     69M 0S  54M 1S 89M 0S
5 83     25       69M 0S     74M 0S  54M 1S 89M 0S
6 83     30       74M 0S     79M 0S  54M 1S 89M 0S

这是我的数据

structure(list(ID = c("83", "83", "83", "83", "83", "83", "83", 
"83", "83", "83", "90", "90", "90", "90", "90", "90", "90", "90", 
"90", "90", "90", "90", "90", "90", "90", "90", "90", "90", "90", 
"90"), Period = c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 5, 5, 
10, 10, 15, 15, 20, 20, 25, 25, 30, 30, 35, 35, 40, 40, 45, 45, 
50, 50), Period.start = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), year = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month = c(0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0), day = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), hour = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0), minute = c(49, 54, 59, 64, 69, 74, 79, 
84, 89, 94, 50, 50, 55, 55, 60, 60, 65, 65, 70, 70, 75, 75, 80, 
80, 85, 85, 90, 90, 95, 95), class = structure("Period", package = "lubridate")), 
    Period.end = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), year = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0), day = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), hour = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), minute = c(54, 59, 64, 69, 
    74, 79, 84, 89, 94, 99, 55, 55, 60, 60, 65, 65, 70, 70, 75, 
    75, 80, 80, 85, 85, 90, 90, 95, 95, 100, 100), class = structure("Period", package = "lubridate")), 
    Start.N = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 32, 32, 
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 
    32, 32, 32), year = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L), month = c(0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), day = c(0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), minute = c(54, 
    54, 54, 54, 54, 54, 54, 54, 54, 54, 52, 94, 52, 94, 52, 94, 
    52, 94, 52, 94, 52, 94, 52, 94, 52, 94, 52, 94, 52, 94), class = structure("Period", package = "lubridate")), 
    End.N = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), year = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), month = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), day = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), minute = c(89, 
    89, 89, 89, 89, 89, 89, 89, 89, 89, 83, 111, 83, 111, 83, 
    111, 83, 111, 83, 111, 83, 111, 83, 111, 83, 111, 83, 111, 
    83, 111), class = structure("Period", package = "lubridate"))), .Names = c("ID", 
"Period", "Period.start", "Period.end", "Start.N", "End.N"), row.names = c(NA, 
30L), class = "data.frame")

1 个答案:

答案 0 :(得分:1)

这是我用来解决问题的方法。 ifelse由eddi提供,之后我进行了dplyr数据管理,我很高兴收到更有效的程序员的意见。

    new.data %>% mutate(NPeriodStart = End.N - Period.start,
        PeriodEndStartN = Period.end - Start.N,
         N.Time.Prep=ifelse(Period.start > End.N | Period.end < Start.N,
                             0,
lubridate::seconds(pmin(End.N - Period.start, Period.end - Start.N)))) %>%

  ###  We fix the over 300 seconds so that it's max 300 (5 min bin)

  mutate(N.Time = ifelse(N.Time.Prep>300, 300, N.Time.Prep)) %>%
  select(-NPeriodStart, - PeriodEndStartN, - N.Time.Prep)


  ## Finally, we get a data frame with just what we want

  # ID, Period, N time  
  # To get rid of the repeated measures (more than one N bout)
  # we summarize and select the max value  

  new.data <- new.data %>%
                group_by(ID, Period) %>% 
                summarise(N.Time = max(N.Time))