我的数据框dat
包含标题Date
Hour
和TYPE
,如下所示:
dat <- structure(list(Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "04/20/17", class = "factor"),
Hour = c("14:30:00", "14:31:00", "14:32:00", "14:33:00",
"14:34:00", "14:35:00", "14:36:00", "14:37:00", "14:38:00",
"14:39:00", "14:40:00", "14:41:00", "14:42:00", "14:43:00",
"14:44:00", "14:45:00", "14:46:00", "14:47:00", "14:48:00",
"14:49:00", "14:50:00", "14:51:00", "14:52:00", "14:53:00",
"14:54:00", "14:55:00", "14:56:00", "14:57:00", "14:58:00",
"14:59:00", "15:00:00", "15:01:00", "15:02:00", "15:03:00",
"15:04:00", "15:05:00", "15:06:00", "15:07:00", "15:08:00",
"15:09:00", "15:10:00", "15:11:00", "15:12:00", "15:13:00",
"15:14:00", "15:15:00", "15:16:00", "15:17:00", "15:18:00",
"15:19:00", "15:20:00", "15:21:00", "15:22:00", "15:23:00",
"15:24:00", "15:25:00", "15:26:00", "15:27:00", "15:28:00",
"15:29:00", "15:30:00", "15:31:00", "15:32:00", "15:33:00",
"15:34:00", "15:35:00", "15:36:00", "15:37:00", "15:38:00",
"15:39:00", "15:40:00", "15:41:00", "15:42:00", "15:43:00",
"15:44:00", "15:45:00", "15:46:00", "15:47:00", "15:48:00",
"15:49:00", "15:50:00", "15:51:00", "15:52:00", "15:53:00",
"15:54:00", "15:55:00", "15:56:00", "15:57:00", "15:58:00",
"15:59:00", "16:00:00", "16:01:00", "16:02:00", "16:03:00",
"16:04:00", "16:05:00", "16:06:00", "16:07:00", "16:08:00",
"16:09:00", "16:10:00", "16:11:00", "16:12:00", "16:13:00",
"16:14:00", "16:15:00", "16:16:00", "16:17:00", "16:18:00",
"16:19:00", "16:20:00", "16:21:00", "16:22:00", "16:23:00",
"16:24:00", "16:25:00", "16:26:00", "16:27:00", "16:28:00",
"16:29:00", "16:30:00", "16:31:00", "16:32:00", "16:33:00",
"16:34:00"), TYPE = c(3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3)), .Names = c("Date",
"Hour", "TYPE"), row.names = 2:126, class = "data.frame")
在此示例中,数据框Date
仅代表一天,但可能有很多天。 TYPE
是数字,对应于所采取的行为(1-3)。
背景
连续跟随的类似TYPE
数字表示正在进行的相同行为。因此,在14:30:00和14:37:00之间进行了单一行为,我想将其称为单Bout
我的问题: 我怎样才能对数据进行排序,以便最终得到如下数据框:
编辑包括下一天:
Date Bout Start End TYPE
4/20/17 1 14:30:00 14:37:00 3
4/20/17 2 14:38:00 15:27:00 1
4/20/17 3 15:28:00 16:03:00 2
4/20/17 4 16:04:00 16:23:00 1
4/20/17 5 16:24:00 16:34:00 3
4/21/17 1 13:00:00 13:01:00 1
4/21/17 2 13:02:00 13:05:00 2
我想根据Date
对它们进行分组,所以这里的日期将开始4/20/17
,下一个(虽然未在示例中显示)将是4/21/17.
我使用aggregate
收集了类似的数据,例如:
setNames(aggregate(cbind(dat$TYPE == 1, dat$TYPE == 2, dat$TYPE == 3), by=list(Category=dat$Date), FUN = sum), c("Date", "Standing", "Lying_right", "Lying_left"))
但无法确定如何在此上下文中使用它。也许某个函数可以放在FUN
的{{1}}中?
答案 0 :(得分:1)
我们可以使用library(dplyr)
dat %>%
group_by(Date, Bout = cumsum(c(1, diff(TYPE) != 0))) %>%
summarise(start = head(Hour, 1),
End = tail(Hour, 1),
TYPE = unique(TYPE))
#Source: local data frame [5 x 5]
#Groups: Date [?]
# Date Bout start End TYPE
# <fctr> <dbl> <chr> <chr> <dbl>
#1 04/20/17 1 14:30:00 14:37:00 3
#2 04/20/17 2 14:38:00 15:27:00 1
#3 04/20/17 3 15:28:00 16:03:00 2
#4 04/20/17 4 16:04:00 16:23:00 1
#5 04/20/17 5 16:24:00 16:34:00 3
。 interface ImagesAdi {
@GET("android/determinace/json/images.json")
fun getImages(): Single<List<ImagesResponse>>
}
的分组取差值不等于0的值(即不同的值)的cumsum。
interface ImagesAdi {
@GET("android/determinace/json/images.json")
fun getImages(): Single<ImagesResponse>
}
答案 1 :(得分:1)
您可以使用data.table
:
library(data.table)
setDT(dat)
dat[,.(Date=first(Date),Start=first(Hour),End=last(Hour)),by=.(Bout=rleid(TYPE),TYPE)]
# Bout TYPE Date Start End
# 1: 1 3 04/20/17 14:30:00 14:37:00
# 2: 2 1 04/20/17 14:38:00 15:27:00
# 3: 3 2 04/20/17 15:28:00 16:03:00
# 4: 4 1 04/20/17 16:04:00 16:23:00
# 5: 5 3 04/20/17 16:24:00 16:34:00
答案 2 :(得分:1)
另一个data.table
解决方案。将日期和时间作为时间戳处理,以便能够在两天结束的情况下处理案例。
library(data.table)
setDT(dat)
dat[, timestamp := strptime(paste(as.character(Date), Hour), format = "%m/%d/%y %T")]
dat[, bout_id := rep(seq(length(rle(TYPE)$values)), rle(TYPE)$lengths)]
dat[, list(Start = min(timestamp), End = max(timestamp), TYPE = TYPE[1]),by = bout_id]
答案 3 :(得分:1)
以下是使用rle
和reshape
一天工作的基本R方法。
# get information on the length of each bout
temp <- rle(dat$TYPE)
stoppers <- cumsum(temp$lengths)
stoppers <- sort(c(1, stoppers, head(stoppers + 1, -1)))
# subset data according to start and stop point of each bout
datNew <- dat[stoppers, ]
# provide id for each bout
datNew$Bout <- rep(seq_along(temp$lengths), each=2)
# provide start and stop value
datNew$timevar <- 1:2
现在,重塑广泛产生
reshape(datNew, idvar="Bout", direction="wide", v.names="Hour", timevar="timevar")
Date TYPE Bout Hour.1 Hour.2
2 04/20/17 3 1 14:30:00 14:37:00
10 04/20/17 1 2 14:38:00 15:27:00
60 04/20/17 2 3 15:28:00 16:03:00
96 04/20/17 1 4 16:04:00 16:23:00
116 04/20/17 3 5 16:24:00 16:34:00
如果要在不同的日期执行此操作,可以将data.frame拆分为日期的data.frames列表,将上述函数应用于每个data.frame,然后使用do.call
在结果上使用rbind
来组合每个日期。这看起来像
do.call(rbind,
lapply(split(dat, dat$Date), function(x) {
temp <- rle(x$TYPE)
stoppers <- cumsum(temp$lengths)
stoppers <- sort(c(1, stoppers, head(stoppers + 1, -1)))
# subset data according to start and stop point of each bout
datNew <- x[stoppers, ]
# provide id for each bout
datNew$Bout <- rep(seq_along(temp$lengths), each=2)
# provide start and stop value
datNew$timevar <- 1:2
reshape(datNew, idvar="Bout", direction="wide", v.names="Hour", timevar="timevar")
}))