我有一个存储在数据框中的活动日志,记录了座席打电话的时间:
Agent | Call Start | Call End
--------------------------------
Albert | 9:33:31 | 9:38:30
Albert | 9:40:33 | 9:44:36
Boris | 9:27:39 | 9:36:39
等
我想重新整理数据,以便在一天中的任何时间段内查看哪些代理被占用或可用:
From | To | Albert | Boris
-----------------------------------
9:00:01 | 9:27:38 | Free | Free
9:27:39 | 9:33:30 | Free | Occupied
9:33:31 | 9:36:39 | Occupied | Occupied
9:36:39 | 9:38:30 | Occupied | Free
等
如果可能的话,我希望使用tidyverse
工具执行此操作,但对所有工作都开放!
答案 0 :(得分:0)
我担心我tidyverse
不是很高兴,但这会产生你想要的结果。
x
Agent Call_Start Call_End
1 Albert 9:33:31 9:38:30
2 Albert 9:40:33 9:44:36
3 Boris 9:27:39 9:36:39
x$Call_Start<-as.POSIXct(x$Call_Start, format=("%H:%M:%S"))
x$Call_End<-as.POSIXct(x$Call_End, format=("%H:%M:%S"))
#create a list of times to check
start<-as.POSIXct("09:27:00", format=("%H:%M:%S"))
end<-as.POSIXct("09:45:00", format=("%H:%M:%S"))
t<-seq.POSIXt(start, end, by="sec")
#create a dataframe to hold the results
agents<-unique(x$Agent)
occ_dat<-data.frame(matrix(nrow=length(t), ncol=length(agents)+1))
occ_dat[,1]<-t
names(occ_dat)<-c("time", as.character(agents))
#divide the data up by agent
each_agent<-split(x, f=x$Agent)
#check for each agent whether or not they were occupied at each time
sil<-sapply(seq_along(agents), function(i){
c_agent<-each_agent[[i]]
apply(c_agent, 1, function(call){
occ_dat[,(i+1)]<<-ifelse(occ_dat$time>=call[2] &
occ_dat$time<=call[3], "occupied", occ_dat[,(i+1)])
})
})
#if agent is not occupied set them to "free"
occ_dat[is.na(occ_dat)]<-"free"
#collapse all times into non-overlapping time periods by giving a unique ID
#every time either agent has a status change
occ_dat$status<-1
counter<-1
occ_dat$status[2:nrow(occ_dat)]<-sapply(2:nrow(occ_dat), function(i){
if(occ_dat[i,2]==occ_dat[i-1,2] &
occ_dat[i,3]==occ_dat[i-1,3]) {counter}
else {counter<<-counter+1; counter}
})
#use duplicated to get the start and end of each unique status period and combine
od1<-occ_dat[!duplicated(occ_dat$status),]
od2<-occ_dat[!duplicated(occ_dat$status, fromLast = TRUE),]
od3<-merge(od1, od2[,c("time", "status")], by="status")
#some formatting
od3<-od3[,c(2,5,3,4)]
names(od3)[1:2]<-c("From", "To")
od3
From To Albert Boris
1 2018-06-05 09:27:00 2018-06-05 09:27:38 free free
2 2018-06-05 09:27:39 2018-06-05 09:33:30 free occupied
3 2018-06-05 09:33:31 2018-06-05 09:36:39 occupied occupied
4 2018-06-05 09:36:40 2018-06-05 09:38:30 occupied free
5 2018-06-05 09:38:31 2018-06-05 09:40:32 free free
6 2018-06-05 09:40:33 2018-06-05 09:44:36 occupied free
7 2018-06-05 09:44:37 2018-06-05 09:45:00 free free
答案 1 :(得分:0)
好的 - 这是一个大量利用tidyverse的包和函数的答案,特别是扩散和收集功能。
library(tidyr)
library(dplyr)
library(chron)
library(zoo)
#Create sample activity log
Agent <- c('Albert','Albert','Boris','Boris','Charlie','Charlie','Charlie')
Call_Start <- times(c('9:33:31','9:40:33','9:27:39','9:41:23','9:19:09','9:35:28','9:46:31')) #use chron function
Call_End <- times(c('9:38:30','9:44:36','9:36:39','9:42:29','9:19:19','9:45:28','9:46:55'))
Start <- data.frame(Agent,Call_Start,Call_End)
#Set the Start and End times for the Day
start_time <- times(c('09:00:01'))
end_time <- times(c('17:00:01'))
#Create the specific time breaks needed
Time_breaks_start <- sort(c(start_time,Start$Call_Start,Start$Call_End))
Time_breaks_end <- sort(c(Start$Call_Start, Start$Call_End,end_time))
Time_breaks <- data.frame(Time_breaks_start,Time_breaks_end)
#Use gather and spread to identify when agents start and end calls.
Gathered <- Start %>%
gather(key="Start_End",value="Time_breaks_start", Call_Start, Call_End) %>%
mutate(Start_End = ifelse(Start_End == 'Call_Start','Occupied','Free'))
Spread <- Gathered %>%
spread(key=Agent, value=Start_End)
#Join spread data to list of time breaks and clean up missing values
Output <- Time_breaks %>%
left_join(Spread, by='Time_breaks_start') %>%
do(na.locf(.)) %>% #uses function from zoo
apply(2, function(x) { x[is.na(x)] <- "Free" ; x })