尝试以这种方式操作时间戳变量:如果以下活动的开始时间早于上一个活动的结束时间,则将上一个活动的开始和结束时间更新为下一个活动之前的1秒
附加说明:
活动可以在同一项工作中重复;即活动“A”。
某些个人活动的开始和结束时间有所不同。这是我故意做的事情;你可以忽略这一点。
workID workActivityID activity status timestamp timestampDesired
1 1 A start 2018-01-01 09:55:01 2018-01-01 09:54:05
1 1 A end 2018-01-01 09:55:01 2018-01-01 09:54:05
1 2 B start 2018-01-01 09:54:06 2018-01-01 09:54:06
1 2 B end 2018-01-01 09:56:22 2018-01-01 09:56:22
1 3 C start 2018-01-01 09:57:22 2018-01-01 09:57:22
1 3 C end 2018-01-01 09:57:22 2018-01-01 09:57:22
1 4 A start 2018-02-02 08:35:00 2018-02-02 08:35:00
1 4 A end 2018-02-02 08:35:00 2018-02-02 08:35:00
2 1 A start 2018-02-02 08:13:55 2018-02-02 08:14:01
2 1 A end 2018-02-02 08:14:20 2018-02-02 08:14:01
2 2 B start 2018-02-02 08:14:02 2018-02-02 08:14:02
2 2 B end 2018-02-02 08:14:50 2018-02-02 08:14:50
2 3 C start 2018-02-02 10:00:00 2018-02-02 10:00:00
2 3 C end 2018-02-02 10:00:00 2018-02-02 10:00:00
2 4 A start 2018-02-02 10:22:00 2018-02-02 10:22:00
2 4 A end 2018-02-02 10:24:00 2018-02-02 10:24:00
数据:
library(lubridate)
df <-
data.frame(
workID = rep(c(1,2), each=8),
workActivityID = rep(c(1,2,3,4), each=2, times=2),
activity = rep(c("A","B","C","A"), each=2, times=2),
startEnd = rep(c("start", "end"), times=8),
timestamp = ymd_hms(c("2018-01-01 09:55:01", "2018-01-01 09:55:01", "2018-01-01 09:54:06", "2018-01-01 09:56:22", "2018-01-01 09:57:22", "2018-01-01 09:57:22", "2018-02-02 08:35:00","2018-02-02 08:35:00",
"2018-02-02 08:13:55", "2018-02-02 08:14:20", "2018-02-02 08:14:02", "2018-02-02 08:14:50", "2018-02-02 10:00:00", "2018-02-02 10:00:00", "2018-02-02 10:22:00", "2018-02-02 10:24:00")),
timestampDesired = ymd_hms(c("2018-01-01 09:54:05", "2018-01-01 09:54:05", "2018-01-01 09:54:06", "2018-01-01 09:56:22", "2018-01-01 09:57:22", "2018-01-01 09:57:22", "2018-02-02 08:35:00", "2018-02-02 08:35:00",
"2018-02-02 08:14:01", "2018-02-02 08:14:01", "2018-02-02 08:14:02", "2018-02-02 08:14:50", "2018-02-02 10:00:00", "2018-02-02 10:00:00", "2018-02-02 10:22:00", "2018-02-02 10:24:00")))
答案 0 :(得分:1)
使用tidyr::spread
,tidyr::gather
可以获得可能的解决方案。从同一行中移动start
和end
的意义上讲,这种方法很简单,因此决策和更改操作(如果需要)将更容易。执行修改后,将其更改为长格式。
library(tidyverse)
df %>% select(-timestampDesired) %>%
spread(startEnd, timestamp) %>%
group_by(workID) %>%
mutate(start = as.POSIXct(ifelse(!is.na(lead(start)) & lead(start) < end,
lead(start) - 1, start), origin = "1970-01-01 00:00:00" )) %>%
mutate(end = as.POSIXct(ifelse(!is.na(lead(start)) & lead(start) < end,
lead(start) - 1, end), origin = "1970-01-01 00:00:00" )) %>%
ungroup() %>%
gather("startEnd", "timestamp", c("start","end")) %>%
arrange(workID, workActivityID, desc(startEnd)) %>%
as.data.frame()
# workID workActivityID activity startEnd timestamp
# 1 1 1 A start 2018-01-01 09:54:05
# 2 1 1 A end 2018-01-01 09:54:05
# 3 1 2 B start 2018-01-01 09:54:06
# 4 1 2 B end 2018-01-01 09:56:22
# 5 1 3 C start 2018-01-01 09:57:22
# 6 1 3 C end 2018-01-01 09:57:22
# 7 1 4 A start 2018-02-02 08:35:00
# 8 1 4 A end 2018-02-02 08:35:00
# 9 2 1 A start 2018-02-02 08:14:01
# 10 2 1 A end 2018-02-02 08:14:01
# 11 2 2 B start 2018-02-02 08:14:02
# 12 2 2 B end 2018-02-02 08:14:50
# 13 2 3 C start 2018-02-02 10:00:00
# 14 2 3 C end 2018-02-02 10:00:00
# 15 2 4 A start 2018-02-02 10:22:00
# 16 2 4 A end 2018-02-02 10:24:00
答案 1 :(得分:1)
只需发布data.table解决方案。内联说明
#cast into a wide format
wideDT <- dcast.data.table(DT, ... ~ startEnd, value.var="timestamp")
#lead the start time vector and compare start time and amend start and end time if required
wideDT[, c("newstart", "newend") := {
x <- shift(start, type="lead", fill=max(end))
list(newstart=as.POSIXct(ifelse(x < end, x - 1, start), origin="1970-01-01"),
newend=as.POSIXct(ifelse(x < end, x - 1, end), origin="1970-01-01"))
}, by=.(workID)]
#get OP's desired output
wideDT[.(workID, workActivityID, activity),
list(startend=c("start", "end"),
timestamp=c(start, end),
timestampDesired=c(newstart, newend)), by=.EACHI]
数据:
library(data.table)
DT <- data.table(
workID = rep(c(1,2), each=8),
workActivityID = rep(c(1,2,3,4), each=2, times=2),
activity = rep(c("A","B","C","A"), each=2, times=2),
startEnd = rep(c("start", "end"), times=8),
timestamp = as.POSIXct(c("2018-01-01 09:55:01", "2018-01-01 09:55:01", "2018-01-01 09:54:06", "2018-01-01 09:56:22", "2018-01-01 09:57:22", "2018-01-01 09:57:22", "2018-02-02 08:35:00","2018-02-02 08:35:00",
"2018-02-02 08:13:55", "2018-02-02 08:14:20", "2018-02-02 08:14:02", "2018-02-02 08:14:50", "2018-02-02 10:00:00", "2018-02-02 10:00:00", "2018-02-02 10:22:00", "2018-02-02 10:24:00")))