计算因子时如何获得等间隔?

时间:2016-11-01 12:22:02

标签: r

我有一些困难要创建一个30分钟休息的时间间隔,从整整00小时或整整00小时30分钟开始:

例如:

    library(reshape2)
    library(dplyr)

    # Given some data which resemble the original data
    foo <- data.frame(start.time = c("2012-02-01 13:47:00", 
                                     "2012-02-01 14:02:00",
                                     "2012-02-01 14:20:00",
                                     "2012-02-01 14:40:00",
                                     "2012-02-01 15:08:00",
                                     "2012-02-01 16:01:00",
                                     "2012-02-01 16:02:00",
                                     "2012-02-01 16:20:00",
                                     "2012-02-01 17:09:00",
                                     "2012-02-01 18:08:00",
                                     "2012-02-01 18:20:00",
                                     "2012-02-01 19:08:00"



                                     ),
                      employee   = c("mike","john","john","steven","mike","mike","mike","steven","mike","steven","mike","mike"))


                          start.time employee
            #1  2012-02-01 13:47:00     mike
            #2  2012-02-01 14:02:00     john
            #3  2012-02-01 14:20:00     john
            #4  2012-02-01 14:40:00   steven
            #5  2012-02-01 15:08:00     mike
            #6  2012-02-01 16:01:00     mike
            #7  2012-02-01 16:02:00     mike
            #8  2012-02-01 16:20:00   steven
            #9  2012-02-01 17:09:00     mike
            #10 2012-02-01 18:08:00   steven
            #11 2012-02-01 18:20:00     mike
            #12 2012-02-01 19:08:00     mike




    # change factor to POSIXct
    foo$start.time <- as.POSIXct(foo$start.time)
    # long to wide
    my_emp<- dcast(foo, start.time ~ employee, fun.aggregate = length)
    # 30 min breaks
    my_emp_ag<- my_emp %>% group_by(start.time = as.POSIXct(cut(start.time, breaks="30 min"))) %>% 
      summarize(john  = sum(john ),mike  = sum(mike ),steven = sum(steven))
    # Missing intervalls
    miss_interval <- data.frame(start.time=seq(from = min(as.POSIXct(my_emp$start.time)), to= max(as.POSIXct(my_emp$start.time)), by = "30 mins"))

    # join old woth new
    substitited <- left_join(miss_interval,my_emp_ag,by=c('start.time'))
    # change NA to zero
    substitited[is.na(substitited)] <- 0

                               start.time john mike steven
                  1  2012-02-01 13:47:00    1    1      0
                  2  2012-02-01 14:17:00    1    0      1
                  3  2012-02-01 14:47:00    0    1      0
                  4  2012-02-01 15:17:00    0    0      0
                  5  2012-02-01 15:47:00    0    2      0
                  6  2012-02-01 16:17:00    0    0      1
                  7  2012-02-01 16:47:00    0    1      0
                  8  2012-02-01 17:17:00    0    0      0
                  9  2012-02-01 17:47:00    0    0      1
                  10 2012-02-01 18:17:00    0    1      0
                  11 2012-02-01 18:47:00    0    1      0

几乎是所希望的2012-02-01 13:30:00 2012-02-01 14:00:00等等。

1 个答案:

答案 0 :(得分:1)

library(data.table)
library(lubridate)

setDT(foo)[, `:=` (
  round.time = {
    todate = ymd_hms(start.time)
    rounddate = floor_date(todate, "30 minutes")
  }
)]


            start.time employee          round.time
 1: 2012-02-01 13:47:00     mike 2012-02-01 13:30:00
 2: 2012-02-01 14:02:00     john 2012-02-01 14:00:00
 3: 2012-02-01 14:20:00     john 2012-02-01 14:00:00
 4: 2012-02-01 14:40:00   steven 2012-02-01 14:30:00
 5: 2012-02-01 15:08:00     mike 2012-02-01 15:00:00
 6: 2012-02-01 16:01:00     mike 2012-02-01 16:00:00
 7: 2012-02-01 16:02:00     mike 2012-02-01 16:00:00
 8: 2012-02-01 16:20:00   steven 2012-02-01 16:00:00
 9: 2012-02-01 17:09:00     mike 2012-02-01 17:00:00
10: 2012-02-01 18:08:00   steven 2012-02-01 18:00:00
11: 2012-02-01 18:20:00     mike 2012-02-01 18:00:00
12: 2012-02-01 19:08:00     mike 2012-02-01 19:00:00