我有一张带有时间表数据的df,我正在寻找一种简单的方法来总结它。我的数据看起来像df1,我想把它总结为df2。我很难设计一种方法来创建增量并在它们之间分配小时数。例如,棘手的部分是分配跨越日期ID 1和3的小时数。
DF1
ID Garage Unit_Name START_DATE_TIME END_DATE_TIME
<chr> <chr> <chr> <dttm> <dttm>
1 A Truck 1/26/2015 21:00 1/27/2015 7:00
2 B Truck 5/13/2015 6:00 5/13/2015 16:00
3 C Car 8/21/2015 21:00 8/22/2015 7:00
6 C Car 8/21/2015 11:00 8/21/2015 21:00
structure(list(ID = c("<chr>", "1", "2", "3", "6", NA, NA, NA,
NA, NA, NA), Garage = c("<chr>", "A", "B", "C", "C", NA, NA,
NA, NA, NA, NA), Unit_Name = c("<chr>", "Truck", "Truck", "Car",
"Car", NA, NA, NA, NA, NA, NA), START_DATE_TIME = c("<dttm>",
"1/26/2015 21:00", "5/13/2015 6:00", "8/21/2015 21:00", "8/21/2015 11:00",
NA, NA, NA, NA, NA, NA), END_DATE_TIME = c("<dttm>", "1/27/2015 7:00",
"5/13/2015 16:00", "8/22/2015 7:00", "8/21/2015 21:00", NA, NA,
NA, NA, NA, NA)), .Names = c("ID", "Garage", "Unit_Name", "START_DATE_TIME",
"END_DATE_TIME"), row.names = c(NA, -11L), class = c("tbl_df",
"tbl", "data.frame"), spec = structure(list(cols = structure(list(
ID = structure(list(), class = c("collector_character", "collector"
)), Garage = structure(list(), class = c("collector_character",
"collector")), Unit_Name = structure(list(), class = c("collector_character",
"collector")), START_DATE_TIME = structure(list(), class = c("collector_character",
"collector")), END_DATE_TIME = structure(list(), class = c("collector_character",
"collector"))), .Names = c("ID", "Garage", "Unit_Name", "START_DATE_TIME",
"END_DATE_TIME")), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"))
DF2
Garage Unit_Name Date Increment Hours
<chr> <chr> <dttm> <chr> <dbl>
A Truck 1/26/2015 18:01-00:00 3
A Truck 1/27/2015 00:01-6:00 6
A Truck 1/27/2015 6:01-12:00 1
B Truck 5/13/2015 6:01-12:00 6
B Truck 5/13/2015 12:01-18:00 4
C Car 8/21/2015 6:01-12:00 1
C Car 8/21/2015 12:01-18:00 6
C Car 8/21/2015 18:01-00:00 6
C Car 8/22/2015 00:01-6:00 6
C Car 8/23/2015 6:01-12:00 1
答案 0 :(得分:1)
library(tidyverse)
library(lubridate)
times=c("00:00","06:00","12:00","18:00")
times1=c("00:01","06:01","12:01","18:01")
df1%>%
group_by(Garage,Unit_Name)%>%
mutate(size=n())%>%
summarise(START_DATE_TIME=min(START_DATE_TIME),
END_DATE_TIME=max(END_DATE_TIME))%>%
mutate(S=mdy_hm(START_DATE_TIME),
b=floor(hour(S)/24*4)+1,
m=ymd_hm(paste(format(S,"%F"),get("times",.GlobalEnv)[b])),
n=ymd_hm(paste(format(S,"%F"),get("times",.GlobalEnv)[(b+1)%%4%>%replace(.,.==0,4)]))%>%
if_else(m>.,.+days(1),.),
rem=as.numeric(mdy_hm(END_DATE_TIME)-n),
HOURS=list(as.numeric(c(n-S,rep(6,rem%/%6),rem%%6))))%>%
unnest()%>%
mutate(Date=S+hours(cumsum(lag(HOURS,default = 0))),
b=floor(hour(Date)/24*4)+1,
increament=paste0(get("times1",.GlobalEnv)[b],"-",
get("times",.GlobalEnv)[replace(d<-(b+1)%%4,d==0,4)]),
Date=as.Date(Date))%>%
select(Garage,Date,HOURS,increament)
Groups: Garage [3]
Garage Date HOURS increament
<chr> <date> <dbl> <chr>
1 A 2015-01-26 3. 18:01-00:00
2 A 2015-01-27 6. 00:01-06:00
3 A 2015-01-27 1. 06:01-12:00
4 B 2015-05-13 6. 06:01-12:00
5 B 2015-05-13 4. 12:01-18:00
6 C 2015-08-21 1. 06:01-12:00
7 C 2015-08-21 6. 12:01-18:00
8 C 2015-08-21 6. 18:01-00:00
9 C 2015-08-22 6. 00:01-06:00
10 C 2015-08-22 1. 06:01-12:00