我有一个日期时间的数据框,如下所示:
library(lubridate)
date_seq <- seq.POSIXt(ymd_hm('2016-04-01 0:00'), ymd_hm('2016-04-30 23:30'), by = '30 mins')
datetimes <- data.frame(datetime = date_seq)
我还有一个数据框,其中包含开放时间,指定开放时间适用的天数范围,以及商店在日期范围内开放的小时范围,如下所示:
opening_times <- data.frame(from_date = c('2016-03-01', '2016-04-15'),
till_date = c('2016-04-15', '2016-05-20'),
from_time = c('11:00', '10:30'),
till_time = c('22:00', '23:00'))
我想要的是在datetimes
标记在开放时间内的那些行。也就是说,只要行中的日期时间在TRUE
和from_date
以及till_date
和from_time
内,我就想要一个till_time
的列。
答案 0 :(得分:1)
如果数据集不是太大,我建议您从opening_times
创建一个新数据集 -
opening_times$from_date = as.Date(opening_times$from_date, '%Y-%m-%d')
opening_times$till_date = as.Date(opening_times$till_date, '%Y-%m-%d')
opening_times2 = do.call(
rbind,
lapply(
seq(nrow(opening_times)),
function (rownumber) {
data.frame(
seq.Date(
from = opening_times[rownumber,'from_date'],
to = opening_times[rownumber,'till_date'],
by = 1
),
from_time = opening_times[rownumber,'from_time'],
till_time = opening_times[rownumber,'till_time']
)
}
)
)
然后将其与日期时间按日期合并,并检查时间是否介于两个值之间。
答案 1 :(得分:1)
lubridate
有一个%within%
函数,用于检查时间是否在lubridate::interval
之内,这可以在创建间隔向量后轻松实现这一点:
# make a sequence of days in each set from opening_times
open_intervals <- apply(opening_times, 1, function(x){
dates <- seq.Date(ymd(x[1]), ymd(x[2]), by = 'day')
})
# turn each date into a lubridate::interval object with the appropriate times
open_intervals <- mapply(function(dates, from, to){
interval(ymd_hm(paste(dates, from)), ymd_hm(paste(dates, to)))
}, open_intervals, opening_times$from_time, opening_times$till_time)
# combine list items into one vector of intervals
open_intervals <- do.call(c, open_intervals)
# use lubridate::%within% to check if each datetime is in any open interval
datetimes$open <- sapply(datetimes$datetime, function(x){
any(x %within% open_intervals)
})
datetimes[20:26,]
# datetime open
# 20 2016-04-01 09:30:00 FALSE
# 21 2016-04-01 10:00:00 FALSE
# 22 2016-04-01 10:30:00 FALSE
# 23 2016-04-01 11:00:00 TRUE
# 24 2016-04-01 11:30:00 TRUE
# 25 2016-04-01 12:00:00 TRUE
# 26 2016-04-01 12:30:00 TRUE
如果你有两组小时,你可以将整个事情压缩成一个(有点巨大的)ifelse
:
datetimes$open <- ifelse(as.Date(datetimes$datetime) %within%
interval(opening_times$from_date[1],
opening_times$till_date[1]),
hm(format(datetimes$datetime, '%H:%M')) >= hm(opening_times$from_time)[1] &
hm(format(datetimes$datetime, '%H:%M')) <= hm(opening_times$till_time)[1],
hm(format(datetimes$datetime, '%H:%M')) >= hm(opening_times$from_time)[2] &
hm(format(datetimes$datetime, '%H:%M')) <= hm(opening_times$till_time)[2])
或
datetimes$open <- ifelse(as.Date(datetimes$datetime) %within%
interval(opening_times$from_date[1],
opening_times$till_date[1]),
datetimes$datetime %within%
interval(ymd_hm(paste(as.Date(datetimes$datetime), opening_times$from_time[1])),
ymd_hm(paste(as.Date(datetimes$datetime), opening_times$till_time[1]))),
datetimes$datetime %within%
interval(ymd_hm(paste(as.Date(datetimes$datetime), opening_times$from_time[2])),
ymd_hm(paste(as.Date(datetimes$datetime), opening_times$till_time[2]))))