我的数据集具有Period
列中表示的任意5分钟间隔时间段。不同观察的时段(IDs
)在不同时间开始(对于ID = '83',它开始于49M,而对于ID = '90',它开始于50M)。观察结果是存储在Start.N
和End.N
列中的时间间隔。
这最后两列有一个区间代表观察的连续时间或'事件'(不论周期),因此重复。一些观察结果具有多个“事件”,并且重复周期以适应那些额外的间隔
我的目标是计算事件和任意分档之间重叠的分钟数和秒数。为了澄清,第一行应该有重叠0M 0S
,而第二行应该有5M 0S
重叠,因为54M 0S - 59M 0S
中包含54M 1S - 89M 0S
。
ID Period Period.start Period.end Start.N End.N
1 83 5 49M 0S 54M 0S 54M 1S 89M 0S
2 83 10 54M 0S 59M 0S 54M 1S 89M 0S
3 83 15 59M 0S 64M 0S 54M 1S 89M 0S
4 83 20 64M 0S 69M 0S 54M 1S 89M 0S
5 83 25 69M 0S 74M 0S 54M 1S 89M 0S
6 83 30 74M 0S 79M 0S 54M 1S 89M 0S
这是我的数据
structure(list(ID = c("83", "83", "83", "83", "83", "83", "83",
"83", "83", "83", "90", "90", "90", "90", "90", "90", "90", "90",
"90", "90", "90", "90", "90", "90", "90", "90", "90", "90", "90",
"90"), Period = c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 5, 5,
10, 10, 15, 15, 20, 20, 25, 25, 30, 30, 35, 35, 40, 40, 45, 45,
50, 50), Period.start = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), year = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), day = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), hour = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), minute = c(49, 54, 59, 64, 69, 74, 79,
84, 89, 94, 50, 50, 55, 55, 60, 60, 65, 65, 70, 70, 75, 75, 80,
80, 85, 85, 90, 90, 95, 95), class = structure("Period", package = "lubridate")),
Period.end = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), year = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), month = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), day = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), hour = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), minute = c(54, 59, 64, 69,
74, 79, 84, 89, 94, 99, 55, 55, 60, 60, 65, 65, 70, 70, 75,
75, 80, 80, 85, 85, 90, 90, 95, 95, 100, 100), class = structure("Period", package = "lubridate")),
Start.N = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32), year = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), month = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), day = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), minute = c(54,
54, 54, 54, 54, 54, 54, 54, 54, 54, 52, 94, 52, 94, 52, 94,
52, 94, 52, 94, 52, 94, 52, 94, 52, 94, 52, 94, 52, 94), class = structure("Period", package = "lubridate")),
End.N = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), year = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), month = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), day = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), minute = c(89,
89, 89, 89, 89, 89, 89, 89, 89, 89, 83, 111, 83, 111, 83,
111, 83, 111, 83, 111, 83, 111, 83, 111, 83, 111, 83, 111,
83, 111), class = structure("Period", package = "lubridate"))), .Names = c("ID",
"Period", "Period.start", "Period.end", "Start.N", "End.N"), row.names = c(NA,
30L), class = "data.frame")
答案 0 :(得分:1)
这是我用来解决问题的方法。 ifelse
由eddi提供,之后我进行了dplyr
数据管理,我很高兴收到更有效的程序员的意见。
new.data %>% mutate(NPeriodStart = End.N - Period.start,
PeriodEndStartN = Period.end - Start.N,
N.Time.Prep=ifelse(Period.start > End.N | Period.end < Start.N,
0,
lubridate::seconds(pmin(End.N - Period.start, Period.end - Start.N)))) %>%
### We fix the over 300 seconds so that it's max 300 (5 min bin)
mutate(N.Time = ifelse(N.Time.Prep>300, 300, N.Time.Prep)) %>%
select(-NPeriodStart, - PeriodEndStartN, - N.Time.Prep)
## Finally, we get a data frame with just what we want
# ID, Period, N time
# To get rid of the repeated measures (more than one N bout)
# we summarize and select the max value
new.data <- new.data %>%
group_by(ID, Period) %>%
summarise(N.Time = max(N.Time))