我有一个数据框如下
timedf <- data.frame(spaceNum=c(1,1,1,2,2,2), starttime= c("2015-09-03 00:00",
"2015-09-04 23:18", "2015-09-05 05:59", "2015-09-03 06:19",
"2015-09-06 09:03", "2015-09-06 09:10"), endtime = c("2015-09-04 20:05", "2015-09-05 05:52",
"2015-09-05 06:15", "2015-09-05 16:36",
"2015-09-06 09:06", "2015-09-06 20:42"))
timedf$staytime <- round(as.numeric(difftime(as.POSIXct(timedf$endtime), as.POSIXct(timedf$starttime),units="hours")), digits=2)
timedf
spaceNum starttime endtime staytime
1 1 2015-09-03 00:00 2015-09-04 20:05 44.08
2 1 2015-09-04 23:18 2015-09-05 05:52 6.57
3 1 2015-09-05 05:59 2015-09-05 06:15 0.27
4 2 2015-09-03 06:19 2015-09-05 16:36 58.28
5 2 2015-09-06 09:03 2015-09-06 09:06 0.05
6 2 2015-09-06 09:10 2015-09-06 20:42 11.53
我想分别计算0To7,7To19和19To24的时间段,即我希望得到低于结果
spaceNum starttime endtime staytime 0To7 7To19 19To24
1 1 2015-09-03 00:00 2015-09-04 20:05 44.08 14 24 6.08
2 1 2015-09-04 23:18 2015-09-05 05:52 6.57 5.87 0 0.7
3 1 2015-09-05 05:59 2015-09-05 06:15 0.27 0.27 0 0
4 2 2015-09-03 06:19 2015-09-05 16:36 58.28 14.683 33.6 10
5 2 2015-09-06 09:03 2015-09-06 09:06 0.05 0 0.05 0
6 2 2015-09-06 09:10 2015-09-06 20:42 11.53 0 9.83 1.7
请评论我该如何做到这一点,我正在寻找具有强大时间功能的lubridate
,但我仍然无法得到答案。
答案 0 :(得分:4)
IMO如果你使用data.tables,那么整个过程会更有效率。
# your data as provided in the question
timedf <- data.frame(spaceNum=c(1,1,1,2,2,2),
starttime= c("2015-09-03 00:00","2015-09-04 23:18", "2015-09-05 05:59", "2015-09-03 06:19","2015-09-06 09:03", "2015-09-06 09:10"),
endtime = c("2015-09-04 20:05", "2015-09-05 05:52", "2015-09-05 06:15", "2015-09-05 16:36","2015-09-06 09:06", "2015-09-06 20:42"))
library(data.table)
# convert starttime and endtime to POSIXct - note that you need tz="GMT"
setDT(timedf)[,c("starttime","endtime"):=lapply(.SD, as.POSIXct, tz="GMT"), .SDcols=2:3]
# add staytime column
timedf[,staytime:=round(as.numeric(difftime(endtime,starttime,units="hours")),digits=2)]
# this does the main work
brks <- c(0,7*3600,19*3600,24*3600)
lbls <- c("0To7","7To19","19To24")
sec.per.day <- 24*60*60
get.cuts <- function(x,y) as.list(table(cut((x:y)%%sec.per.day, breaks=brks, labels=lbls))/3600)
timedf[,c(lbls):=get.cuts(starttime,endtime), by=1:nrow(timedf)]
timedf
# spaceNum starttime endtime staytime 0To7 7To19 19To24
# 1: 1 2015-09-03 00:00:00 2015-09-04 20:05:00 44.08 14.0000000 24.00000000 6.083056
# 2: 1 2015-09-04 23:18:00 2015-09-05 05:52:00 6.57 5.8666667 0.00000000 0.700000
# 3: 1 2015-09-05 05:59:00 2015-09-05 06:15:00 0.27 0.2669444 0.00000000 0.000000
# 4: 2 2015-09-03 06:19:00 2015-09-05 16:36:00 58.28 14.6836111 33.60000000 9.999444
# 5: 2 2015-09-06 09:03:00 2015-09-06 09:06:00 0.05 0.0000000 0.05027778 0.000000
# 6: 2 2015-09-06 09:10:00 2015-09-06 20:42:00 11.53 0.0000000 9.83361111 1.700000
这种方法利用了POSIXct变量只是自1970-01-01以来的秒数这一事实。 get.cuts(...)
函数生成一个来自starttime:endtime
的整数向量,然后将每个元素模数为24 * 60 * 60,它将一天中的秒数除去并留下余数。然后我们使用cut(...)
根据范围00:00:00 - 07:00:00,07:00:01 - 19:00:00和19:00:01将每个元素分配到一个bin中 - 24:00:00,我们使用table(...)
来计算每个bin中的秒数。然后我们通过除以3600转换为小时并将结果作为列表返回(这是data.table所必需的)。
最后,我们为data.table中的每一行调用get.cuts(...)
。
如您所见,代码短于解释...
答案 1 :(得分:1)
最后我为此编写了一个函数,我知道函数很难看 而不是优化,但这是目前我可以获得的结果,如果有人能够提供更简单的解决方案,那就太棒了。
getp1TOp2 <- function(x,y,p1,p2) {
#Calcuate the numofdays
numdays <- as.numeric(difftime( update(as.POSIXct(y), hour=0, min=0, sec=0), update(as.POSIXct(x), hour=0, min=0, sec=0),units = "days"))
#Calculate the FirstdayStart, FirstdayEnd, LastdayStart and LastdayEnd
FirstdayStart <- update(as.POSIXct(x), hour=p1, min=0, sec=0)
FirstdayEnd <- update(as.POSIXct(x), hour=p2, min=0, sec=0)
LastdayStart <- update(as.POSIXct(y), hour=p1, min=0, sec=0)
LastdayEnd <- update(as.POSIXct(y), hour=p2, min=0, sec=0)
# For case Start and End in the same day , eg (2015-09-07 03:00, 2015-09-07 06:00)
if (numdays==0)
{
#For case StartTime >= FirstdayStart
if (as.POSIXct(x) >= FirstdayStart)
{
#For case EndTime < FirstdayEnd
if (as.POSIXct(y) <= FirstdayEnd)
{
#Example fall into this case (2015-09-07 08:00-2015-09-07 18:32), just return diff of start/end time
outval<- as.numeric(difftime(as.POSIXct(y), as.POSIXct(x), units="hours"))
}
else
{
#Example fall into this case (2015-09-07 08:00-2015-09-07 20:11 ), return diff of start/ FirstdayEnd time
outval<- as.numeric(difftime(FirstdayEnd, as.POSIXct(x), units="hours"))
}
}
else #For case StartTime < FirstdayStart
{
#For case EndTime < FirstdayEnd
if (as.POSIXct(y) <= FirstdayEnd)
{
#Example fall into this case (2015-09-07 06:00-2015-09-07 18:32), just return diff of FirstdayStart/end time
outval<- as.numeric(difftime(as.POSIXct(y), FirstdayStart, units="hours"))
}
else
{
#Example fall into this case (2015-09-07 06:00-2015-09-07 20:11 ), return diff of FirstdayStart/ FirstdayEnd time
outval<- as.numeric(difftime(FirstdayEnd, FirstdayStart, units="hours"))
}
}
}
else # For case Start and End not in the same day , eg (2015-09-07 03:00", 2015-09-07 06:00)
{
# For case starttime < FirstdayStart Calculate the Firstday period first, 2015-09-03 02:00 2015-09-04 16:00
if (as.POSIXct(x)< FirstdayStart )
{
#Example fall into this case (2015-09-07 06:00 2015-09-08 20:11 ), firstdayPeriod return diff of FirstdayStart/ FirstdayEnd time
firstdayPeriod <- as.numeric(difftime(FirstdayEnd, FirstdayStart, units="hours"))
}
else
{
if (as.POSIXct(x) <= FirstdayEnd)
{
#Example fall into this case (2015-09-07 08:00 2015-09-08 20:11 ), firstdayPeriod = of Startime/ FirstdayEnd time
firstdayPeriod <- as.numeric(difftime(FirstdayEnd, as.POSIXct(x), units="hours"))
}
else
{ #Example fall into this case (2015-09-07 20:00 2015-09-08 20:11 ), firstdayPeriod=0
firstdayPeriod <- c(0)
}
}
#Calculate the last day period
if (as.POSIXct(y) > LastdayEnd)
{
#Example fall into this case (2015-09-07 08:00 2015-09-08 21:00 ), lastdayPeriod = of LastdayEnd/ LastdayStart time
lastdayPeriod <- as.numeric(difftime(FirstdayEnd, FirstdayStart, units="hours"))
}
else
{
if (as.POSIXct(y) >= LastdayStart)
{
#Example fall into this case (2015-09-07 08:00 2015-09-08 18:00 ), lastdayPeriod = of LastdayEnd/ endtime time
lastdayPeriod <- as.numeric(difftime(as.POSIXct(y),LastdayStart, units="hours"))
}
else
{
lastdayPeriod <- c(0)
}
}
#Calculate the overrall time
outval <- lastdayPeriod +firstdayPeriod + (numdays-1)* as.numeric(difftime(FirstdayEnd,FirstdayStart, units="hours"))
}
outval <- round(outval, digits=2)
if (outval < 0)
{
outval <- c(0)
}
outval
}
我得到如下结果
timedf$P0To7 <- apply(timedf[,c(2,3)], 1, function(x) getp1TOp2(x[1], x[2], 0, 7))
timedf$P7To19 <- apply(timedf[,c(2,3)], 1, function(x) getp1TOp2(x[1], x[2], 7, 19))
timedf$P19To24 <- apply(timedf[,c(2,3)], 1, function(x) getp1TOp2(x[1], x[2], 19, 24))
> timedf
spaceNum starttime endtime staytime P0To7 P7To19 P19To24
1 1 2015-09-03 00:00 2015-09-04 20:05 44.08 14.00 24.00 6.08
2 1 2015-09-04 23:18 2015-09-05 05:52 6.57 5.87 0.00 0.70
3 1 2015-09-05 05:59 2015-09-05 06:15 0.27 0.27 0.00 0.00
4 2 2015-09-03 06:19 2015-09-05 16:36 58.28 14.68 33.60 10.00
5 2 2015-09-06 09:03 2015-09-06 09:06 0.05 0.00 0.05 0.00
6 2 2015-09-06 09:10 2015-09-06 20:42 11.53 0.00 9.83 1.70