我的数据框有Entry Time
和Exit time
。我试图得到一个列Stay Duration
,这是退出时间和下一行输入时间之间的差异。数据框按日期和输入时间排序。
第一行的停留时间为:6:54:50 - 7:34:36 = 0:39:46
我面临的问题是针对以下情况:
当Exit_Time is 22:15:05
和下一行Entry_Time has 6:02:46
时。在这种情况下,我需要Stay Duration to be 7:47:41
dput(df)
structure(list(JRNY_ID_NUM = c(115492027250, 115492027250, 115523231209,
115523231209, 115526742250, 115526742250, 115509240124, 115509240124,
115539253765, 115539253765, 115570245886, 115567046025, 115562452408,
115562452408, 115574565032, 115574565032), BIZ_DT = structure(c(1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("2017-01-01",
"2017-01-02", "2017-01-03", "2017-01-04", "2017-01-05", "2017-01-06",
"2017-01-07", "2017-01-09", "2017-01-10", "2017-01-11", "2017-01-12",
"2017-01-13", "2017-01-14", "2017-01-15", "2017-01-16", "2017-01-17",
"2017-01-18", "2017-01-19", "2017-01-20", "2017-01-21", "2017-01-22",
"2017-01-23", "2017-01-24", "2017-01-25", "2017-01-26", "2017-01-27",
"2017-01-31"), class = "factor"), ENTRY_TM = structure(c(37L,
41L, 45L, 46L, 8L, 25L, 52L, 73L, 5L, 15L, 56L, 89L, 29L, 33L,
63L, 77L), .Label = c("05:30:39", "05:32:07", "05:32:33", "05:32:38",
"05:32:50", "05:32:59", "05:33:06", "05:37:14", "05:37:58", "05:38:34",
"05:38:38", "05:40:22", "05:40:49", "05:41:16", "05:42:27", "05:47:17",
"05:48:03", "05:48:13", "05:48:54", "05:49:15", "05:50:17", "05:51:42",
"05:52:30", "05:53:20", "05:54:40", "05:56:24", "05:57:59", "06:00:11",
"06:02:46", "06:03:28", "06:05:44", "06:32:18", "06:40:32", "06:40:40",
"06:42:35", "06:45:51", "06:45:55", "06:52:49", "06:57:25", "07:03:49",
"07:34:36", "08:26:43", "09:16:34", "10:16:10", "12:21:51", "13:36:40",
"15:29:30", "16:07:03", "16:10:49", "16:13:51", "16:15:04", "16:29:20",
"16:47:49", "16:48:42", "16:55:50", "16:56:27", "16:58:53", "17:01:02",
"17:03:31", "17:06:19", "17:09:03", "17:11:22", "17:12:15", "17:12:57",
"17:15:11", "17:16:56", "17:21:07", "17:22:18", "17:22:22", "17:23:53",
"17:28:37", "17:30:17", "17:30:24", "17:31:21", "17:32:22", "17:59:07",
"18:16:25", "18:17:13", "18:23:36", "18:27:40", "18:44:43", "18:46:36",
"18:53:21", "20:55:32", "21:06:00", "21:07:08", "21:18:10", "21:18:21",
"21:42:25", "21:43:45", "23:31:38"), class = "factor"), EXIT_TM = structure(c(34L,
37L, 45L, 46L, 9L, 27L, 54L, 60L, 7L, 20L, 71L, 88L, 25L, 40L,
68L, 72L), .Label = c("?", "05:37:56", "05:39:50", "05:39:51",
"05:39:53", "05:40:03", "05:40:51", "05:41:01", "05:43:02", "05:44:51",
"05:45:04", "05:45:45", "05:56:35", "05:57:45", "05:58:58", "06:03:06",
"06:21:39", "06:23:35", "06:24:30", "06:24:58", "06:28:36", "06:29:17",
"06:29:25", "06:32:11", "06:32:15", "06:34:01", "06:36:28", "06:41:02",
"06:41:05", "06:43:31", "06:44:44", "06:51:05", "06:51:46", "06:54:50",
"07:07:55", "07:39:39", "07:43:21", "07:48:49", "08:05:22", "08:17:58",
"08:18:15", "08:32:12", "09:34:23", "10:31:51", "13:17:38", "13:46:19",
"16:08:29", "16:26:21", "16:26:50", "16:37:08", "17:09:13", "17:25:49",
"17:26:31", "17:26:50", "17:27:35", "17:28:20", "17:31:16", "17:34:43",
"17:35:16", "17:36:37", "17:42:09", "17:42:23", "18:00:27", "18:06:53",
"18:08:38", "18:09:18", "18:13:02", "18:14:35", "18:20:15", "18:22:06",
"18:23:17", "18:25:18", "18:25:30", "18:28:47", "18:30:11", "18:30:54",
"18:33:31", "18:38:49", "18:41:19", "18:52:25", "19:05:37", "19:27:49",
"21:06:21", "21:41:28", "21:47:13", "21:53:35", "21:54:29", "22:15:05",
"22:25:41", "23:59:35"), class = "factor")), .Names = c("JRNY_ID_NUM",
"BIZ_DT", "ENTRY_TM", "EXIT_TM"), row.names = c(160L, 73L, 51L,
145L, 111L, 56L, 119L, 157L, 168L, 131L, 81L, 78L, 135L, 35L,
165L, 25L), class = "data.frame")
我想要获得的输出是:
JRNY_ID_NUM BIZ_DT ENTRY_TM EXIT_TM Stay_Duration
115492027250 1/1/2017 6:45:55 6:54:50 0:39:46
115492027250 1/1/2017 7:34:36 7:43:21 4:38:30
115523231209 1/1/2017 12:21:51 13:17:38 0:19:02
115523231209 1/1/2017 13:36:40 13:46:19 15:50:55
115526742250 1/2/2017 5:37:14 5:43:02 0:11:38
115526742250 1/2/2017 5:54:40 6:36:28 9:52:52
115509240124 1/2/2017 16:29:20 17:26:50 0:03:34
115509240124 1/2/2017 17:30:24 17:36:37 11:56:13
115539253765 1/3/2017 5:32:50 5:40:51 0:01:36
115539253765 1/3/2017 5:42:27 6:24:58 10:31:29
115570245886 1/3/2017 16:56:27 18:23:17 3:19:08
115567046025 1/3/2017 21:42:25 22:15:05 7:47:41
115562452408 1/4/2017 6:02:46 6:32:15 0:08:17
115562452408 1/4/2017 6:40:32 8:17:58 8:54:17
115574565032 1/4/2017 17:12:15 18:14:35 0:01:50
115574565032 1/4/2017 18:16:25 18:25:18
答案 0 :(得分:1)
更新
尝试:
require(dplyr)
diff_to_hms <- function(x) {
y <- abs(x)
sprintf("%s:%02d:%02d:%02d",
ifelse(x < 0, "-", ""),
y %% 86400 %/% 3600,
y %% 3600 %/% 60,
y %% 60 %/% 1)
}
dat %>% mutate(ENTRY_TM = as.POSIXct(strptime(paste(BIZ_DT,ENTRY_TM),format = "%Y-%m-%d %H:%M:%S")),
EXIT_TM = as.POSIXct(strptime(paste(BIZ_DT,EXIT_TM),format = "%Y-%m-%d %H:%M:%S"))) %>%
mutate(Stay_Duration = as.numeric(difftime(EXIT_TM,lead(ENTRY_TM),unit="secs")*-1),
Stay_Duration2 = diff_to_hms(Stay_Duration))
答案 1 :(得分:1)
试试这个:
require(data.table)
require(Hmisc)
setDT(df)
setnames(df, Cs(ID, da, en, ex))
df[, en1 := as.POSIXct(paste(da, en))]
df[, ex1 := as.POSIXct(paste(da, ex))]
df[, s := Lag(en1, -1)]
df[, Stay_Duration := s - ex1]
df[, Cs(en1,ex1,s) := NULL]
df
# ID da en ex Stay_Duration
# 1: 115492027250 2017-01-01 06:45:55 06:54:50 39.766667 mins
# 2: 115492027250 2017-01-01 07:34:36 07:43:21 278.500000 mins
# 3: 115523231209 2017-01-01 12:21:51 13:17:38 19.033333 mins
# 4: 115523231209 2017-01-01 13:36:40 13:46:19 950.916667 mins
# 5: 115526742250 2017-01-02 05:37:14 05:43:02 11.633333 mins
# 6: 115526742250 2017-01-02 05:54:40 06:36:28 592.866667 mins
# 7: 115509240124 2017-01-02 16:29:20 17:26:50 3.566667 mins
# 8: 115509240124 2017-01-02 17:30:24 17:36:37 716.216667 mins
# 9: 115539253765 2017-01-03 05:32:50 05:40:51 1.600000 mins
# 10: 115539253765 2017-01-03 05:42:27 06:24:58 631.483333 mins
# 11: 115570245886 2017-01-03 16:56:27 18:23:17 199.133333 mins
# 12: 115567046025 2017-01-03 21:42:25 22:15:05 467.683333 mins
# 13: 115562452408 2017-01-04 06:02:46 06:32:15 8.283333 mins
# 14: 115562452408 2017-01-04 06:40:32 08:17:58 534.283333 mins
# 15: 115574565032 2017-01-04 17:12:15 18:14:35 1.833333 mins
# 16: 115574565032 2017-01-04 18:16:25 18:25:18 NA mins
df[, stay2 := strftime(
as.POSIXct(
as.numeric(Stay_Duration)*60,
origin = as.POSIXct("00:00:00", format = "%H:%M:%S", tz = "GMT"),
tz = "GMT"),
format = "%H:%M:%S", tz = "GMT")]
df
# ID da en ex Stay_Duration stay2
# 1: 115492027250 2017-01-01 06:45:55 06:54:50 39.766667 mins 00:39:46
# 2: 115492027250 2017-01-01 07:34:36 07:43:21 278.500000 mins 04:38:30
# 3: 115523231209 2017-01-01 12:21:51 13:17:38 19.033333 mins 00:19:02
# 4: 115523231209 2017-01-01 13:36:40 13:46:19 950.916667 mins 15:50:55
# 5: 115526742250 2017-01-02 05:37:14 05:43:02 11.633333 mins 00:11:38
# 6: 115526742250 2017-01-02 05:54:40 06:36:28 592.866667 mins 09:52:52
# 7: 115509240124 2017-01-02 16:29:20 17:26:50 3.566667 mins 00:03:34
# 8: 115509240124 2017-01-02 17:30:24 17:36:37 716.216667 mins 11:56:13
# 9: 115539253765 2017-01-03 05:32:50 05:40:51 1.600000 mins 00:01:36
# 10: 115539253765 2017-01-03 05:42:27 06:24:58 631.483333 mins 10:31:29
# 11: 115570245886 2017-01-03 16:56:27 18:23:17 199.133333 mins 03:19:08
# 12: 115567046025 2017-01-03 21:42:25 22:15:05 467.683333 mins 07:47:41
# 13: 115562452408 2017-01-04 06:02:46 06:32:15 8.283333 mins 00:08:17
# 14: 115562452408 2017-01-04 06:40:32 08:17:58 534.283333 mins 08:54:17
# 15: 115574565032 2017-01-04 17:12:15 18:14:35 1.833333 mins 00:01:50
# 16: 115574565032 2017-01-04 18:16:25 18:25:18 NA mins NA
答案 2 :(得分:1)
您需要使时间知道日期并移动一列以匹配相应的条目。例如:
library(dplyr) # Needed for lead function
# Consider 'x' as your DF
x$entDate <- as.POSIXct(paste(x$BIZ_DT, x$ENTRY_TM)) # make entry date-aware
x$extDate <- as.POSIXct(paste(x$BIZ_DT, x$EXIT_TM)) # make exit date-aware
x$Stay_Duration <- as.numeric(lead(x$entDate, 1) - x$extDate, units = "secs") # Timedelta in seconds, could also be 'mins' etc.
# JRNY_ID_NUM BIZ_DT ENTRY_TM EXIT_TM entDate extDate Stay_Duration
# 160 115492027250 2017-01-01 06:45:55 06:54:50 2017-01-01 06:45:55 2017-01-01 06:54:50 2386
# 73 115492027250 2017-01-01 07:34:36 07:43:21 2017-01-01 07:34:36 2017-01-01 07:43:21 16710
# 51 115523231209 2017-01-01 12:21:51 13:17:38 2017-01-01 12:21:51 2017-01-01 13:17:38 1142
# 145 115523231209 2017-01-01 13:36:40 13:46:19 2017-01-01 13:36:40 2017-01-01 13:46:19 57055
# 111 115526742250 2017-01-02 05:37:14 05:43:02 2017-01-02 05:37:14 2017-01-02 05:43:02 698
# 56 115526742250 2017-01-02 05:54:40 06:36:28 2017-01-02 05:54:40 2017-01-02 06:36:28 35572
# 119 115509240124 2017-01-02 16:29:20 17:26:50 2017-01-02 16:29:20 2017-01-02 17:26:50 214
# 157 115509240124 2017-01-02 17:30:24 17:36:37 2017-01-02 17:30:24 2017-01-02 17:36:37 42973
# 168 115539253765 2017-01-03 05:32:50 05:40:51 2017-01-03 05:32:50 2017-01-03 05:40:51 96
# 131 115539253765 2017-01-03 05:42:27 06:24:58 2017-01-03 05:42:27 2017-01-03 06:24:58 37889
# 81 115570245886 2017-01-03 16:56:27 18:23:17 2017-01-03 16:56:27 2017-01-03 18:23:17 11948
# 78 115567046025 2017-01-03 21:42:25 22:15:05 2017-01-03 21:42:25 2017-01-03 22:15:05 28061
# 135 115562452408 2017-01-04 06:02:46 06:32:15 2017-01-04 06:02:46 2017-01-04 06:32:15 497
# 35 115562452408 2017-01-04 06:40:32 08:17:58 2017-01-04 06:40:32 2017-01-04 08:17:58 32057
# 165 115574565032 2017-01-04 17:12:15 18:14:35 2017-01-04 17:12:15 2017-01-04 18:14:35 110
# 25 115574565032 2017-01-04 18:16:25 18:25:18 2017-01-04 18:16:25 2017-01-04 18:25:18 NA
但请注意,此解决方案不适用于同一行中的进入和退出确实发生在不同日期(因此我的评论)的情况。
答案 3 :(得分:1)
这是一个天真的基础R可能性(天真,因为它不知道日期并只是使用时间):
desired_output <-
read.table(text =
"JRNY_ID_NUM BIZ_DT ENTRY_TM EXIT_TM Stay_Duration
115492027250 1/1/2017 6:45:55 6:54:50 0:39:46
115492027250 1/1/2017 7:34:36 7:43:21 4:38:30
115523231209 1/1/2017 12:21:51 13:17:38 0:19:02
115523231209 1/1/2017 13:36:40 13:46:19 15:50:55
115526742250 1/2/2017 5:37:14 5:43:02 0:11:38
115526742250 1/2/2017 5:54:40 6:36:28 9:52:52
115509240124 1/2/2017 16:29:20 17:26:50 0:03:34
115509240124 1/2/2017 17:30:24 17:36:37 11:56:13
115539253765 1/3/2017 5:32:50 5:40:51 0:01:36
115539253765 1/3/2017 5:42:27 6:24:58 10:31:29
115570245886 1/3/2017 16:56:27 18:23:17 3:19:08
115567046025 1/3/2017 21:42:25 22:15:05 7:47:41
115562452408 1/4/2017 6:02:46 6:32:15 0:08:17
115562452408 1/4/2017 6:40:32 8:17:58 8:54:17
115574565032 1/4/2017 17:12:15 18:14:35 0:01:50
115574565032 1/4/2017 18:16:25 18:25:18 place_holder",
stringsAsFactors = FALSE, header = TRUE)
all.equal(
as.difftime(desired_output$Stay_Duration, "%H:%M:%S", "secs"),
res$Stay_Duration
)
# [1] TRUE
它只是比较时间,并应用模运算。
正如其他人已经说过的那样,如果日期存在差距,您可能会遇到各种各样的问题。
使用所需输出进行一致性检查:
count($_FILES)