合并日期和小时范围

时间:2016-04-27 17:30:17

标签: r datetime merge

我有一个日期时间的数据框,如下所示:

library(lubridate)

date_seq <- seq.POSIXt(ymd_hm('2016-04-01 0:00'), ymd_hm('2016-04-30 23:30'), by = '30 mins')
datetimes <- data.frame(datetime = date_seq)

我还有一个数据框,其中包含开放时间,指定开放时间适用的天数范围,以及商店在日期范围内开放的小时范围,如下所示:

opening_times <- data.frame(from_date = c('2016-03-01', '2016-04-15'),
                            till_date = c('2016-04-15', '2016-05-20'),
                            from_time = c('11:00', '10:30'),
                            till_time = c('22:00', '23:00'))

我想要的是在datetimes标记在开放时间内的那些行。也就是说,只要行中的日期时间在TRUEfrom_date以及till_datefrom_time内,我就想要一个till_time的列。

2 个答案:

答案 0 :(得分:1)

如果数据集不是太大,我建议您从opening_times创建一个新数据集 -

opening_times$from_date = as.Date(opening_times$from_date, '%Y-%m-%d')
opening_times$till_date = as.Date(opening_times$till_date, '%Y-%m-%d')
opening_times2 = do.call(
   rbind,
   lapply(
       seq(nrow(opening_times)), 
       function (rownumber) {
          data.frame(
              seq.Date(
                  from = opening_times[rownumber,'from_date'],
                  to = opening_times[rownumber,'till_date'],
                  by = 1
              ),
              from_time = opening_times[rownumber,'from_time'],
              till_time = opening_times[rownumber,'till_time']
          )
       }
   )
)

然后将其与日期时间按日期合并,并检查时间是否介于两个值之间。

答案 1 :(得分:1)

lubridate有一个%within%函数,用于检查时间是否在lubridate::interval之内,这可以在创建间隔向量后轻松实现这一点:

# make a sequence of days in each set from opening_times
open_intervals <- apply(opening_times, 1, function(x){
    dates <- seq.Date(ymd(x[1]), ymd(x[2]), by = 'day')
})

# turn each date into a lubridate::interval object with the appropriate times
open_intervals <- mapply(function(dates, from, to){
    interval(ymd_hm(paste(dates, from)), ymd_hm(paste(dates, to)))
}, open_intervals, opening_times$from_time, opening_times$till_time)

# combine list items into one vector of intervals
open_intervals <- do.call(c, open_intervals)

# use lubridate::%within% to check if each datetime is in any open interval
datetimes$open <- sapply(datetimes$datetime, function(x){
    any(x %within% open_intervals)
})

datetimes[20:26,]
#               datetime  open
# 20 2016-04-01 09:30:00 FALSE
# 21 2016-04-01 10:00:00 FALSE
# 22 2016-04-01 10:30:00 FALSE
# 23 2016-04-01 11:00:00  TRUE
# 24 2016-04-01 11:30:00  TRUE
# 25 2016-04-01 12:00:00  TRUE
# 26 2016-04-01 12:30:00  TRUE

修改

如果你有两组小时,你可以将整个事情压缩成一个(有点巨大的)ifelse

datetimes$open <- ifelse(as.Date(datetimes$datetime) %within% 
                             interval(opening_times$from_date[1], 
                                      opening_times$till_date[1]), 
                         hm(format(datetimes$datetime, '%H:%M')) >= hm(opening_times$from_time)[1] &
                             hm(format(datetimes$datetime, '%H:%M')) <= hm(opening_times$till_time)[1],
                         hm(format(datetimes$datetime, '%H:%M')) >= hm(opening_times$from_time)[2] &
                             hm(format(datetimes$datetime, '%H:%M')) <= hm(opening_times$till_time)[2])

datetimes$open <- ifelse(as.Date(datetimes$datetime) %within% 
                             interval(opening_times$from_date[1], 
                                      opening_times$till_date[1]), 
                         datetimes$datetime %within% 
                             interval(ymd_hm(paste(as.Date(datetimes$datetime), opening_times$from_time[1])),
                                      ymd_hm(paste(as.Date(datetimes$datetime), opening_times$till_time[1]))),
                         datetimes$datetime %within% 
                             interval(ymd_hm(paste(as.Date(datetimes$datetime), opening_times$from_time[2])),
                                      ymd_hm(paste(as.Date(datetimes$datetime), opening_times$till_time[2]))))