Question

我的问题与基于连续性的测量分组有关。这是一个例子。

a=seq(as.POSIXct("2014-07-20 10:00:00"), as.POSIXct("2014-07-20 12:00:00"), by="30 min")
b=seq(as.POSIXct("2014-07-20 20:00:00"), as.POSIXct("2014-07-20 22:00:00"), by="30 min")
c=seq(as.POSIXct("2014-07-21 08:30:00"), as.POSIXct("2014-07-21 10:30:00"), by="30 min")
df= data.frame(date=c(a,b,c), conc=runif(15))

所需的输出将如下所示

 date       conc group
2014-07-20 10:00:00 0.30899449     x
2014-07-20 10:30:00 0.25436235     x
2014-07-20 11:00:00 0.01122904     x
2014-07-20 11:30:00 0.38944058     x
2014-07-20 12:00:00 0.26457760     x
2014-07-20 20:00:00 0.50039528     y
2014-07-20 20:30:00 0.72761115     y
2014-07-20 21:00:00 0.06544978     y
2014-07-20 21:30:00 0.01836020     y
2014-07-20 22:00:00 0.26401722     y
2014-07-21 08:30:00 0.51394754     z
2014-07-21 09:00:00 0.23298657     z
2014-07-21 09:30:00 0.27799685     z
2014-07-21 10:00:00 0.34541882     z
2014-07-21 10:30:00 0.08069711     z

Answer 1

尝试

df$group <- c("x", "y", "z")[cumsum(c(TRUE,diff(df$date)!=30))]
df$group
#[1] "x" "x" "x" "x" "x" "y" "y" "y" "y" "y" "z" "z" "z" "z" "z"

或

 df$group <- factor(cumsum(c(TRUE,df$date[-1]-df$date[-nrow(df)] >30)), 
                                                    labels=letters[24:26])

Answer 2

坚持plyr和dplyr，这将是一种方法。

library(plyr)
library(dplyr)

df %>%
    mutate(group = cumsum(c(F, diff(date) > 30)),
           group = as.character(group),
           group = revalue(group, c("0" = "x", "1" = "y", "2" = "z")))

#                  date       conc group
#1  2014-07-20 10:00:00 0.74729551     x
#2  2014-07-20 10:30:00 0.93302377     x
#3  2014-07-20 11:00:00 0.63812335     x
#4  2014-07-20 11:30:00 0.07552563     x
#5  2014-07-20 12:00:00 0.92733773     x
#6  2014-07-20 20:00:00 0.01533538     y
#7  2014-07-20 20:30:00 0.47554703     y
#8  2014-07-20 21:00:00 0.65365968     y
#9  2014-07-20 21:30:00 0.87205291     y
#10 2014-07-20 22:00:00 0.34648272     y
#11 2014-07-21 08:30:00 0.03557178     z
#12 2014-07-21 09:00:00 0.37837235     z
#13 2014-07-21 09:30:00 0.29888315     z
#14 2014-07-21 10:00:00 0.90837697     z
#15 2014-07-21 10:30:00 0.61514152     z

基于测量日期时间连续性的分组

2 个答案: