在问题Defining variable by logical subseting on time interval in data.table中,我请求帮助根据事件之间的时间代码(即event==1
# Defining variables and data.table
id <- rep(LETTERS[1:3],each=5)
event <- c(sample(c(0,1),2,F),sample(c(0,0,2),3,F),
event[event==2] <- sample(c(2,3),3,T)
state <- "NULL"
time <- c(apply(matrix(runif(3*5),5,3),2,cumsum))
DT <- data.table(id,event,state,time)
DT[14,] <- DT[13,]
id event state time
1: A 0 NULL 0.3279207
2: A 1 NULL 1.2824244
3: A 0 NULL 2.1719637
4: A 3 NULL 2.8647671 <- Event 2 or 3 marks the end point
5: A 0 NULL 3.5052739
6: B 0 NULL 0.9942698
7: B 1 NULL 1.6499756
8: B 2 NULL 2.3585060 <- Event 2 or 3 marks the end point
9: B 0 NULL 2.9025721
10: B 0 NULL 3.4967141
11: C 1 NULL 0.2891597
12: C 0 NULL 0.4362734
13: C 2 NULL 1.3992976 <- Here both 2 and 3 appear at the same endpoint
14: C 3 NULL 1.3992976 <- Here both 2 and 3 appear at the same endpoint
15: C 0 NULL 2.9923019
id event state time
1: A 0 NULL 0.3279207
2: A 1 1 1.2824244
3: A 0 1 2.1719637
4: A 3 1 2.8647671
5: A 0 NULL 3.5052739
6: B 0 NULL 0.9942698
7: B 1 1 1.6499756
8: B 2 1 2.3585060
9: B 0 NULL 2.9025721
10: B 0 NULL 3.4967141
11: C 1 1 0.2891597
12: C 0 1 0.4362734
13: C 2 1 1.3992976
14: C 3 1 1.3992976
15: C 0 NULL 2.9923019
DT[,state:=ifelse(time>=time[event==1] & (time<=time[event==2] | time<=time[event==3]),1,state),by=id]
Error in `[.data.table`(DT, , `:=`(state, ifelse(time >= time[event == :
Type of RHS ('logical') must match LHS ('character'). To check and coerce would
impact performance too much for the fastest cases. Either change the type of the target
column, or coerce the RHS of := yourself (e.g. by using 1L instead of 1)
DT[,state:=ifelse(time>=time[event==1] & time<=time[event==2 | event==3],1,state),by=id]
但是当逻辑语句time<=time[event==2 | event==3]
答案 0 :(得分:3)
DT[, rows:=1:.N , by=id][
, state:=ifelse(rows >= which(event==1) & rows <= max(which(event==2), which(event==3)), 1, state), by=id]
id event state time rows
1: A 0 NULL 0.3279207 1
2: A 1 1 1.2824244 2
3: A 0 1 2.1719637 3
4: A 3 1 2.8647671 4
5: A 0 NULL 3.5052739 5
6: B 0 NULL 0.9942698 1
7: B 1 1 1.6499756 2
8: B 2 1 2.3585060 3
9: B 0 NULL 2.9025721 4
10: B 0 NULL 3.4967141 5
11: C 1 1 0.2891597 1
12: C 0 1 0.4362734 2
13: C 2 1 1.3992976 3
14: C 3 1 1.3992976 4
15: C 0 NULL 2.9923019 5
答案 1 :(得分:3)
DT[id=='A', time[event==2]]
## numeric(0)
解决此问题的最简单方法是采取例如最多两次:time <= max(time[event %in% 2:3])
DT[, state := ifelse(time >= time[event==1] & time <= max(time[event %in% 2:3]), 1, state), by=id]
## id event state time
## 1: A 0 NULL 0.3279207
## 2: A 1 1 1.2824244
## 3: A 0 1 2.1719637
## 4: A 3 1 2.8647671
## 5: A 0 NULL 3.5052739
## 6: B 0 NULL 0.9942698
## 7: B 1 1 1.6499756
## 8: B 2 1 2.3585060
## 9: B 0 NULL 2.9025721
## 10: B 0 NULL 3.4967141
## 11: C 1 1 0.2891597
## 12: C 0 1 0.4362734
## 13: C 2 1 1.3992976
## 14: C 3 1 1.3992976
## 15: C 0 NULL 2.9923019
答案 2 :(得分:1)
DT[, segment := cumsum(event == 1)]
DT[, keep := cumsum(c(1, event[-.N]) %in% c(2, 3)) < 1, by = segment]
DT[segment == 0, keep := FALSE]
DT[keep == TRUE, state := 1]
DT[, segment := NULL]
DT[, keep := NULL]