Question

尝试以这种方式操作时间戳变量：如果以下活动的开始时间早于上一个活动的结束时间，则将上一个活动的开始和结束时间更新为下一个活动之前的1秒

附加说明：

活动可以在同一项工作中重复;即活动“A”。

某些个人活动的开始和结束时间有所不同。这是我故意做的事情;你可以忽略这一点。

workID  workActivityID activity     status             timestamp      timestampDesired
     1               1        A      start   2018-01-01 09:55:01   2018-01-01 09:54:05
     1               1        A        end   2018-01-01 09:55:01   2018-01-01 09:54:05
     1               2        B      start   2018-01-01 09:54:06   2018-01-01 09:54:06
     1               2        B        end   2018-01-01 09:56:22   2018-01-01 09:56:22
     1               3        C      start   2018-01-01 09:57:22   2018-01-01 09:57:22
     1               3        C        end   2018-01-01 09:57:22   2018-01-01 09:57:22
     1               4        A      start   2018-02-02 08:35:00   2018-02-02 08:35:00
     1               4        A        end   2018-02-02 08:35:00   2018-02-02 08:35:00
     2               1        A      start   2018-02-02 08:13:55   2018-02-02 08:14:01
     2               1        A        end   2018-02-02 08:14:20   2018-02-02 08:14:01
     2               2        B      start   2018-02-02 08:14:02   2018-02-02 08:14:02
     2               2        B        end   2018-02-02 08:14:50   2018-02-02 08:14:50
     2               3        C      start   2018-02-02 10:00:00   2018-02-02 10:00:00
     2               3        C        end   2018-02-02 10:00:00   2018-02-02 10:00:00
     2               4        A      start   2018-02-02 10:22:00   2018-02-02 10:22:00
     2               4        A        end   2018-02-02 10:24:00   2018-02-02 10:24:00

数据：

library(lubridate)
df <- 
  data.frame(
    workID = rep(c(1,2), each=8),
    workActivityID = rep(c(1,2,3,4), each=2, times=2),
    activity = rep(c("A","B","C","A"), each=2, times=2),
    startEnd = rep(c("start", "end"), times=8),
    timestamp = ymd_hms(c("2018-01-01 09:55:01", "2018-01-01 09:55:01", "2018-01-01 09:54:06", "2018-01-01 09:56:22", "2018-01-01 09:57:22", "2018-01-01 09:57:22", "2018-02-02 08:35:00","2018-02-02 08:35:00",
                          "2018-02-02 08:13:55", "2018-02-02 08:14:20", "2018-02-02 08:14:02", "2018-02-02 08:14:50", "2018-02-02 10:00:00", "2018-02-02 10:00:00", "2018-02-02 10:22:00", "2018-02-02 10:24:00")),
    timestampDesired = ymd_hms(c("2018-01-01 09:54:05", "2018-01-01 09:54:05", "2018-01-01 09:54:06", "2018-01-01 09:56:22", "2018-01-01 09:57:22", "2018-01-01 09:57:22", "2018-02-02 08:35:00", "2018-02-02 08:35:00",
                                 "2018-02-02 08:14:01", "2018-02-02 08:14:01", "2018-02-02 08:14:02", "2018-02-02 08:14:50", "2018-02-02 10:00:00", "2018-02-02 10:00:00", "2018-02-02 10:22:00", "2018-02-02 10:24:00")))

Answer 1

使用tidyr::spread，tidyr::gather可以获得可能的解决方案。从同一行中移动start和end的意义上讲，这种方法很简单，因此决策和更改操作（如果需要）将更容易。执行修改后，将其更改为长格式。

library(tidyverse)

df %>% select(-timestampDesired) %>%
  spread(startEnd, timestamp) %>%
  group_by(workID) %>%
  mutate(start = as.POSIXct(ifelse(!is.na(lead(start)) & lead(start) < end,
                  lead(start) - 1, start), origin = "1970-01-01 00:00:00" )) %>%
  mutate(end = as.POSIXct(ifelse(!is.na(lead(start)) & lead(start) < end,
                 lead(start)  - 1, end), origin = "1970-01-01 00:00:00" )) %>%
  ungroup() %>%  
  gather("startEnd", "timestamp", c("start","end")) %>%
  arrange(workID, workActivityID, desc(startEnd)) %>%
  as.data.frame()

#      workID workActivityID activity startEnd           timestamp
# 1       1              1        A    start 2018-01-01 09:54:05
# 2       1              1        A      end 2018-01-01 09:54:05
# 3       1              2        B    start 2018-01-01 09:54:06
# 4       1              2        B      end 2018-01-01 09:56:22
# 5       1              3        C    start 2018-01-01 09:57:22
# 6       1              3        C      end 2018-01-01 09:57:22
# 7       1              4        A    start 2018-02-02 08:35:00
# 8       1              4        A      end 2018-02-02 08:35:00
# 9       2              1        A    start 2018-02-02 08:14:01
# 10      2              1        A      end 2018-02-02 08:14:01
# 11      2              2        B    start 2018-02-02 08:14:02
# 12      2              2        B      end 2018-02-02 08:14:50
# 13      2              3        C    start 2018-02-02 10:00:00
# 14      2              3        C      end 2018-02-02 10:00:00
# 15      2              4        A    start 2018-02-02 10:22:00
# 16      2              4        A      end 2018-02-02 10:24:00

Answer 2

只需发布data.table解决方案。内联说明

#cast into a wide format
wideDT <- dcast.data.table(DT, ... ~ startEnd, value.var="timestamp")

#lead the start time vector and compare start time and amend start and end time if required
wideDT[, c("newstart", "newend") := {
        x <- shift(start, type="lead", fill=max(end))
        list(newstart=as.POSIXct(ifelse(x < end, x - 1, start), origin="1970-01-01"),
            newend=as.POSIXct(ifelse(x < end, x - 1, end), origin="1970-01-01"))
    }, by=.(workID)]

#get OP's desired output
wideDT[.(workID, workActivityID, activity), 
    list(startend=c("start", "end"), 
        timestamp=c(start, end),
        timestampDesired=c(newstart, newend)), by=.EACHI]

数据：

library(data.table)
DT <- data.table(
    workID = rep(c(1,2), each=8),
    workActivityID = rep(c(1,2,3,4), each=2, times=2),
    activity = rep(c("A","B","C","A"), each=2, times=2),
    startEnd = rep(c("start", "end"), times=8),
    timestamp = as.POSIXct(c("2018-01-01 09:55:01", "2018-01-01 09:55:01", "2018-01-01 09:54:06", "2018-01-01 09:56:22", "2018-01-01 09:57:22", "2018-01-01 09:57:22", "2018-02-02 08:35:00","2018-02-02 08:35:00",
        "2018-02-02 08:13:55", "2018-02-02 08:14:20", "2018-02-02 08:14:02", "2018-02-02 08:14:50", "2018-02-02 10:00:00", "2018-02-02 10:00:00", "2018-02-02 10:22:00", "2018-02-02 10:24:00")))

r，根据以下记录的值更新时间戳

2 个答案: