dplyr按组汇总,同时按日期和增量分配小时数

时间:2018-06-12 23:01:46

标签: r datetime dplyr mutate summarization

我有一张带有时间表数据的df,我正在寻找一种简单的方法来总结它。我的数据看起来像df1,我想把它总结为df2。我很难设计一种方法来创建增量并在它们之间分配小时数。例如,棘手的部分是分配跨越日期ID 1和3的小时数。

DF1

ID    Garage  Unit_Name START_DATE_TIME  END_DATE_TIME
<chr> <chr>   <chr>     <dttm>           <dttm>
1     A       Truck     1/26/2015 21:00  1/27/2015 7:00
2     B       Truck     5/13/2015 6:00   5/13/2015 16:00
3     C       Car       8/21/2015 21:00  8/22/2015 7:00
6     C       Car       8/21/2015 11:00  8/21/2015 21:00


structure(list(ID = c("<chr>", "1", "2", "3", "6", NA, NA, NA, 
NA, NA, NA), Garage = c("<chr>", "A", "B", "C", "C", NA, NA, 
NA, NA, NA, NA), Unit_Name = c("<chr>", "Truck", "Truck", "Car", 
"Car", NA, NA, NA, NA, NA, NA), START_DATE_TIME = c("<dttm>", 
"1/26/2015 21:00", "5/13/2015 6:00", "8/21/2015 21:00", "8/21/2015 11:00", 
NA, NA, NA, NA, NA, NA), END_DATE_TIME = c("<dttm>", "1/27/2015 7:00", 
"5/13/2015 16:00", "8/22/2015 7:00", "8/21/2015 21:00", NA, NA, 
NA, NA, NA, NA)), .Names = c("ID", "Garage", "Unit_Name", "START_DATE_TIME", 
"END_DATE_TIME"), row.names = c(NA, -11L), class = c("tbl_df", 
"tbl", "data.frame"), spec = structure(list(cols = structure(list(
    ID = structure(list(), class = c("collector_character", "collector"
    )), Garage = structure(list(), class = c("collector_character", 
    "collector")), Unit_Name = structure(list(), class = c("collector_character", 
    "collector")), START_DATE_TIME = structure(list(), class = c("collector_character", 
    "collector")), END_DATE_TIME = structure(list(), class = c("collector_character", 
    "collector"))), .Names = c("ID", "Garage", "Unit_Name", "START_DATE_TIME", 
"END_DATE_TIME")), default = structure(list(), class = c("collector_guess", 
"collector"))), .Names = c("cols", "default"), class = "col_spec"))

DF2

Garage  Unit_Name   Date        Increment    Hours
<chr>   <chr>       <dttm>      <chr>        <dbl>
A       Truck       1/26/2015   18:01-00:00   3
A       Truck       1/27/2015   00:01-6:00    6
A       Truck       1/27/2015   6:01-12:00    1
B       Truck       5/13/2015   6:01-12:00    6
B       Truck       5/13/2015   12:01-18:00   4
C       Car         8/21/2015   6:01-12:00    1
C       Car         8/21/2015   12:01-18:00   6
C       Car         8/21/2015   18:01-00:00   6
C       Car         8/22/2015   00:01-6:00    6
C       Car         8/23/2015   6:01-12:00    1 

1 个答案:

答案 0 :(得分:1)

library(tidyverse)
library(lubridate)
times=c("00:00","06:00","12:00","18:00")
times1=c("00:01","06:01","12:01","18:01")
df1%>%
  group_by(Garage,Unit_Name)%>%
  mutate(size=n())%>%
  summarise(START_DATE_TIME=min(START_DATE_TIME),
            END_DATE_TIME=max(END_DATE_TIME))%>%
  mutate(S=mdy_hm(START_DATE_TIME),
         b=floor(hour(S)/24*4)+1,
         m=ymd_hm(paste(format(S,"%F"),get("times",.GlobalEnv)[b])),
         n=ymd_hm(paste(format(S,"%F"),get("times",.GlobalEnv)[(b+1)%%4%>%replace(.,.==0,4)]))%>%
           if_else(m>.,.+days(1),.),
         rem=as.numeric(mdy_hm(END_DATE_TIME)-n),
         HOURS=list(as.numeric(c(n-S,rep(6,rem%/%6),rem%%6))))%>%
  unnest()%>%
  mutate(Date=S+hours(cumsum(lag(HOURS,default = 0))),
         b=floor(hour(Date)/24*4)+1,
         increament=paste0(get("times1",.GlobalEnv)[b],"-",
                           get("times",.GlobalEnv)[replace(d<-(b+1)%%4,d==0,4)]),
         Date=as.Date(Date))%>%
  select(Garage,Date,HOURS,increament)

Groups:   Garage [3]
   Garage Date       HOURS increament 
   <chr>  <date>     <dbl> <chr>      
 1 A      2015-01-26    3. 18:01-00:00
 2 A      2015-01-27    6. 00:01-06:00
 3 A      2015-01-27    1. 06:01-12:00
 4 B      2015-05-13    6. 06:01-12:00
 5 B      2015-05-13    4. 12:01-18:00
 6 C      2015-08-21    1. 06:01-12:00
 7 C      2015-08-21    6. 12:01-18:00
 8 C      2015-08-21    6. 18:01-00:00
 9 C      2015-08-22    6. 00:01-06:00
10 C      2015-08-22    1. 06:01-12:00