我有一些时间数据
library(data.table); library(lubridate); set.seed(42)
dat <- rbind(data.table(time=as.POSIXct("2019-01-01 08:00:00") + round(runif(10,60,1e4)), val=runif(10),group=1)[order(time), id:=seq_len(.N)],
data.table(time=as.POSIXct("2019-02-01 18:00:00") + round(runif(10,60,1e4)), val=runif(10),group=2)[order(time), id:=seq_len(.N)])
> dat[order(group,id)]
time val group id
1: 2019-01-01 08:23:19 0.117487362 1 1
2: 2019-01-01 08:48:24 0.934672247 1 2
3: 2019-01-01 09:27:00 0.940014523 1 3
4: 2019-01-01 09:47:19 0.462292823 1 4
5: 2019-01-01 09:49:51 0.474997082 1 5
6: 2019-01-01 09:57:48 0.560332746 1 6
7: 2019-01-01 10:03:02 0.978226428 1 7
8: 2019-01-01 10:18:35 0.255428824 1 8
9: 2019-01-01 10:32:33 0.457741776 1 9
10: 2019-01-01 10:36:15 0.719112252 1 10
11: 2019-02-01 18:14:39 0.003948339 2 1
12: 2019-02-01 18:23:59 0.811055141 2 2
13: 2019-02-01 19:05:39 0.007334147 2 3
14: 2019-02-01 19:15:03 0.906601408 2 4
15: 2019-02-01 19:26:11 0.832916080 2 5
16: 2019-02-01 20:19:30 0.611778643 2 6
17: 2019-02-01 20:30:46 0.737595618 2 7
18: 2019-02-01 20:31:03 0.207658973 2 8
19: 2019-02-01 20:37:50 0.685169729 2 9
20: 2019-02-01 20:44:50 0.388108283 2 10
,我想在接下来的一个小时中为每个val
的值计算time
的总和。例如,对于ID 1,这将是ID 1和ID 2的val
的总和(因为ID 3的时间比ID 1之后的一小时多),对于ID 2,这是{{1} }代表ID 2到4,依此类推。这样会产生所需的输出(仅对于组1)
val
最后可能出现两种行为:
> res
time val id new1 new2
1: 2019-01-01 08:23:19 0.1174874 1 1.052160 1.052160
2: 2019-01-01 08:48:24 0.9346722 2 2.336979 2.336979
3: 2019-01-01 09:27:00 0.9400145 3 3.671292 3.671292
4: 2019-01-01 09:47:19 0.4622928 4 3.908132 3.908132
5: 2019-01-01 09:49:51 0.4749971 5 3.445839 NA
6: 2019-01-01 09:57:48 0.5603327 6 2.970842 NA
7: 2019-01-01 10:03:02 0.9782264 7 2.410509 NA
8: 2019-01-01 10:18:35 0.2554288 8 1.432283 NA
9: 2019-01-01 10:32:33 0.4577418 9 1.176854 NA
10: 2019-01-01 10:36:15 0.7191123 10 0.719112 NA
有ID的id的情况下才计算总和,而所有其他值都设置为NA(首选)。我怀疑要解决这个问题,我需要在time
内子集化,但这是我经常遇到且无法解决的问题。我尚未了解一般的处理方法。
答案 0 :(得分:2)
可能是join的循环
dat1 <- dat[order(id)]
out <- rbindlist(lapply(dat1$id, function(i) {
d1 <- dat1[seq_len(.N) >= match(i, id)]
d1[d1[, .(time = time %m+% hours(1))], .(time1 = time, val, new1 = sum(val)),
on = .(time <= time), by = .EACHI][1]
}))[, time := NULL][]
setnames(out, 1, "time")
out[time < time[2] %m+% hours(1), new2 := new1]
out
# time val new1 new2
# 1: 2019-01-01 08:23:19 0.1174874 1.0521596 1.052160
# 2: 2019-01-01 08:48:24 0.9346722 2.3369796 2.336980
# 3: 2019-01-01 09:27:00 0.9400145 3.6712924 3.671292
# 4: 2019-01-01 09:47:19 0.4622928 3.9081319 3.908132
# 5: 2019-01-01 09:49:51 0.4749971 3.4458391 NA
# 6: 2019-01-01 09:57:48 0.5603327 2.9708420 NA
# 7: 2019-01-01 10:03:02 0.9782264 2.4105093 NA
# 8: 2019-01-01 10:18:35 0.2554288 1.4322829 NA
# 9: 2019-01-01 10:32:33 0.4577418 1.1768540 NA
#10: 2019-01-01 10:36:15 0.7191123 0.7191123 NA
对于新数据,我们可以按组split
并应用相同的方法
f1 <- function(data) {
lst1 <- split(data, data[["group"]])
rbindlist(lapply(lst1, function(.dat) {
out <- rbindlist(lapply(.dat$id, function(i) {
d1 <- .dat[seq_len(.N) >= match(i, id)]
d1[d1[, .(time = time %m+% hours(1))], .(time1 = time, val, new1 = sum(val)),
on = .(time <= time), by = .EACHI][1]
}))[, time := NULL][]
setnames(out, 1, "time")
out[time[.N]-time > hours(1), new2 := new1][]
})
)}
f1(dat1)
# time val new1 new2
#1: 2019-01-01 08:23:19 0.117487362 1.0521596 1.0521596
#2: 2019-01-01 08:48:24 0.934672247 2.3369796 2.3369796
#3: 2019-01-01 09:27:00 0.940014523 3.6712924 3.6712924
#4: 2019-01-01 09:47:19 0.462292823 3.9081319 3.9081319
#5: 2019-01-01 09:49:51 0.474997082 3.4458391 NA
#6: 2019-01-01 09:57:48 0.560332746 2.9708420 NA
#7: 2019-01-01 10:03:02 0.978226428 2.4105093 NA
#8: 2019-01-01 10:18:35 0.255428824 1.4322829 NA
#9: 2019-01-01 10:32:33 0.457741776 1.1768540 NA
#10: 2019-01-01 10:36:15 0.719112252 0.7191123 NA
#11: 2019-02-01 18:14:39 0.003948339 0.8223376 0.8223376
#12: 2019-02-01 18:23:59 0.811055141 1.7249907 1.7249907
#13: 2019-02-01 19:05:39 0.007334147 1.7468516 1.7468516
#14: 2019-02-01 19:15:03 0.906601408 1.7395175 1.7395175
#15: 2019-02-01 19:26:11 0.832916080 1.4446947 NA
#16: 2019-02-01 20:19:30 0.611778643 2.6303112 NA
#17: 2019-02-01 20:30:46 0.737595618 2.0185326 NA
#18: 2019-02-01 20:31:03 0.207658973 1.2809370 NA
#19: 2019-02-01 20:37:50 0.685169729 1.0732780 NA
#20: 2019-02-01 20:44:50 0.388108283 0.3881083 NA