我正在尝试在DateTime数据库中添加一个新变量,当它不拦截“ 08:00:00” /“ 20:00:00”时,可以分配“ day”和“ night”,但是当它截取了我要根据在08:00-20:00(白天)或20:00-08:00(夜晚)之外花费的最长时间分配“白天”或“晚上”这两个时间点。
#Current input
pacman::p_load(pacman,lubridate,chron)
id<-c("m1","m1","m1","m2","m2","m2","m3","m4","m4")
x<-c("1998-01-03 10:00:00","1998-01-03 16:00:00","1998-01-03 19:20:00","1998-01-04 00:50:00","1998-01-06 11:20:00","1998-01-06 20:50:00","1998-01-06 22:00:00","1998-01-07 06:30:00","1998-01-07 07:50:00")
start<-as.POSIXct(x,"%Y-%m-%d %H:%M:%S",tz="UTC")
y<-c("1998-01-03 16:00:00","1998-01-03 19:20:00","1998-01-04 00:50:00","1998-01-06 11:20:00","1998-01-06 20:50:00","1998-01-06 22:00:00","1998-01-07 07:40:00","1998-01-07 07:50:00","1998-01-07 08:55:00")
end<-as.POSIXct(y,"%Y-%m-%d %H:%M:%S",tz="UTC")
mydata<-data.frame(id,start,end)
#Current output
df1 <- mydata %>%
mutate(start1 = as.POSIXct(sub("\\d+-\\d+-\\d+", Sys.Date(), start)),
end1 = as.POSIXct(sub("\\d+-\\d+-\\d+", Sys.Date(), end)),
day.night = case_when(start1 >= as.POSIXct('08:00:00', format = "%T") &
end1 >= as.POSIXct('08:00:00', format = "%T") &
end1 < as.POSIXct('20:00:00', format = "%T") ~ "day",
start1 >= as.POSIXct('20:00:00', format = "%T") &
(start1 < as.POSIXct('08:00:00', format = "%T") | end1 < as.POSIXct('23:00:00', format = "%T"))|
(start1 < as.POSIXct('08:00:00', format = "%T") & end1 < as.POSIXct('08:00:00', format = "%T")) ~ "night",
difftime(as.POSIXct('20:00:00', format = "%T"), start1) > difftime(end1, as.POSIXct('20:00:00', format = "%T")) ~ "day",
difftime(as.POSIXct('20:00:00', format = "%T"), start1) < difftime(end1, as.POSIXct('20:00:00', format = "%T")) ~ "night",
TRUE ~ "mixed"))
当前输出错误分配了任何截取08:00-20:00的时间段
即第3行应该=“ night”,因为4hrs50mins是“ night”,而40 mins是“ day”
第4行应=“ night”,因为31hrs50mins是“ night”,而28hrs20mins是“ day”
#Current table
id start end start1 end1 day.night
1 m1 1998-01-03 10:00:00 1998-01-03 16:00:00 2019-09-03 10:00:00 2019-09-03 16:00:00 day
2 m1 1998-01-03 16:00:00 1998-01-03 19:20:00 2019-09-03 16:00:00 2019-09-03 19:20:00 day
3 m1 1998-01-03 19:20:00 1998-01-04 00:50:00 2019-09-03 19:20:00 2019-09-03 00:50:00 day
4 m2 1998-01-04 00:50:00 1998-01-06 11:20:00 2019-09-03 00:50:00 2019-09-03 11:20:00 day
5 m2 1998-01-06 11:20:00 1998-01-06 20:50:00 2019-09-03 11:20:00 2019-09-03 20:50:00 day
6 m2 1998-01-06 20:50:00 1998-01-06 22:00:00 2019-09-03 20:50:00 2019-09-03 22:00:00 night
7 m3 1998-01-06 22:00:00 1998-01-07 07:40:00 2019-09-03 22:00:00 2019-09-03 07:40:00 night
8 m4 1998-01-07 06:30:00 1998-01-07 07:50:00 2019-09-03 06:30:00 2019-09-03 07:50:00 night
9 m4 1998-01-07 07:50:00 1998-01-07 08:55:00 2019-09-03 07:50:00 2019-09-03 08:55:00 day
答案 0 :(得分:2)
library(dplyr)
library(lubridate)
library(chron)
id<-c("m1","m1","m1","m2","m2","m2","m3","m4","m4")
x<-c("1998-01-03 10:00:00","1998-01-03 16:00:00","1998-01-03 19:20:00","1998-01-04 00:50:00","1998-01-06 11:20:00","1998-01-06 20:50:00","1998-01-06 22:00:00","1998-01-07 06:30:00","1998-01-07 07:50:00")
start<-as.POSIXct(x,"%Y-%m-%d %H:%M:%S",tz="UTC")
y<-c("1998-01-03 16:00:00","1998-01-03 19:20:00","1998-01-04 00:50:00","1998-01-06 11:20:00","1998-01-06 20:50:00","1998-01-06 22:00:00","1998-01-07 07:40:00","1998-01-07 07:50:00","1998-01-07 08:55:00")
end<-as.POSIXct(y,"%Y-%m-%d %H:%M:%S",tz="UTC")
mydata<-data.frame(id,start,end)
#Current output
df1 <- mydata %>%
mutate(i = interval(start, end),
total_interval_length = time_length(i, unit = "hour")) %>%
# Calculate daytime hours on first and last days
mutate(first_day = floor_date(start, unit = "day"),
last_day = floor_date(end, unit = "day")) %>%
mutate(first_day_daytime =
interval(update(first_day, hour = 8), update(first_day, hour = 20)),
last_day_daytime =
interval(update(last_day, hour = 8), update(last_day, hour = 20))) %>%
mutate(first_day_overlap =
coalesce(as.numeric(as.duration(intersect(first_day_daytime, i)), "hour"),0),
last_day_overlap =
coalesce(as.numeric(as.duration(intersect(last_day_daytime, i)), "hour"),0)
) %>%
# Calculate total daytime hours
# For rows of one date only, that is just first_day_overlap (or last_day_overlap since it's the same day)
# For rows in multiple dates, it's the first_day_overlap plus last_day_overlap plus 12 hours for each day in between
mutate(daytime_length =
ifelse(first_day == last_day,
first_day_overlap,
first_day_overlap + last_day_overlap +
12*(as.numeric(as.duration(interval(first_day, last_day)), "day")-1))
) %>%
# Assign day or night classification
mutate(day_night = ifelse(daytime_length >= total_interval_length - daytime_length, "day", "night"))