我试图找出如何根据到达和通话时间设置队列。基本上我想要一个列,显示当时排队的人数。我希望能够在一个人到达时添加1并在呼叫一个人时减去1。关于如何做到这一点的任何想法?优选地,不依赖于循环。以下是数据的前20行:
df[1:20,]
Date_of_Service Ticket Arrival_Time Call_Time
1 09/01/2015 1 6:40:04 7:31:09
2 09/01/2015 2 6:59:14 7:32:24
3 09/01/2015 3 6:59:36 7:33:47
4 09/01/2015 4 7:00:16 7:30:04
5 09/01/2015 5 7:11:10 7:35:34
6 09/01/2015 6 7:11:55 7:36:51
7 09/01/2015 7 7:17:09 7:30:48
8 09/01/2015 8 7:21:33 7:37:38
9 09/01/2015 9 7:21:53 7:34:39
10 09/01/2015 10 7:22:28 7:38:32
11 09/01/2015 11 7:24:18 7:38:40
12 09/01/2015 12 7:25:08 7:39:55
13 09/01/2015 13 7:26:18 7:40:14
14 09/01/2015 14 7:26:34 7:42:21
15 09/01/2015 15 7:30:09 7:36:22
16 09/01/2015 16 7:30:24 7:42:36
17 09/01/2015 17 7:39:47 7:43:20
18 09/01/2015 18 7:46:20 7:47:22
19 09/01/2015 19 7:46:36 7:47:30
20 09/01/2015 20 7:48:46 7:49:44
我将数据转换为POSIXlt并试图运行一个不起作用的恶意循环:
arrival <- paste(as.character(df$Date_of_Service), as.character(df$Arrival_Time))
call <- paste(as.character(df$Date_of_Service), as.character(df$Call_Time))
arrival <- as.POSIXlt(arrival, tz="", format="%m/%d/%Y %H:%M:%S")
call <- as.POSIXlt(call, tz="", format="%m/%d/%Y %H:%M:%S")
queue <- rep(0, length(arrival))
queue[1] <- 1
x <- 1
y <- 2
while(x < 1+length(call)){
while(y < 1+length(arrival)){
ifelse(difftime(call[x], arrival[y], units="secs") > 0,
queue[y] <- queue[y-1] + 1,
queue[y] <- queue[y-1] - 1)
y <- y+1
}
x <- x+1
}
有什么建议吗?
答案 0 :(得分:3)
以下是使用sapply
的答案。
我要在数据框上放置到达和调用列:
df$arrival <- paste(as.character(df$Date_of_Service), as.character(df$Arrival_Time))
df$call <- paste(as.character(df$Date_of_Service), as.character(df$Call_Time))
df$arrival <- as.POSIXlt(df$arrival, tz="", format="%m/%d/%Y %H:%M:%S")
df$call <- as.POSIXlt(df$call, tz="", format="%m/%d/%Y %H:%M:%S")
然后使用sapply
,我们会发现calls
的总时间早于每个arrival
(即已经回答的时间),并减去总呼叫数量:{ {1}}
1:nrow(df)
答案 1 :(得分:3)
您可以使用cumsum
n <- 1000
start <- as.POSIXct("2016-01-01 7:30")
end <- as.POSIXct("2016-01-01 16:30")
arrival <- sort(as.POSIXct(runif(n, start, end), origin = "1970-1-1"))
waiting <- runif(n, 60, 3600)
call <- arrival + waiting
rawdata <- data.frame(
ticket = seq_len(n),
arrival,
call
)
library(dplyr)
queue <- rawdata %>%
transmute(time = arrival, change = 1) %>%
bind_rows(
rawdata %>%
transmute(time = call, change = -1)
) %>%
arrange(time) %>%
mutate(queue = cumsum(change))
library(ggplot2)
ggplot(queue, aes(x = time, y = queue)) + geom_point()
以下是两种方法之间的时间比较
library(microbenchmark)
microbenchmark(
jeremycg = rawdata$queue <- 1:nrow(rawdata) - sapply(rawdata$arrival, function(x){sum(x > rawdata$call)}),
thierry = {
queue <- rawdata %>%
transmute(time = arrival, change = 1) %>%
bind_rows(
rawdata %>%
transmute(time = call, change = -1)
) %>%
arrange(time) %>%
mutate(queue = cumsum(change))
}
)
n = 1000
Unit: milliseconds
expr min lq mean median uq max neval cld
jeremycg 72.116199 75.185721 78.901888 78.772244 81.266603 114.272287 100 b
thierry 4.512768 4.673343 5.049725 4.886944 5.065051 7.354791 100 a
n = 2000
的结果。请注意,排序解决方案可以更好地扩展。排序现在快了大约28倍。
Unit: milliseconds
expr min lq mean median uq max neval cld
jeremycg 123.12036 131.24167 140.620648 140.448737 148.017769 189.783860 100 b
thierry 4.48925 4.72359 5.067656 4.890579 5.131761 7.064431 100 a
n = 10000
Unit: milliseconds
expr min lq mean median uq max neval cld
jeremycg 1167.0648 1185.2014 1210.70674 1205.39125 1231.09153 1304.36404 100 b
thierry 14.9901 15.3119 16.01322 15.72178 16.53855 18.15884 100 a
排序明显更好