我的目标是检查每个受试者的药物重叠。当我比较每一行时,我遗漏了一些应该重叠的行。
df <- data.frame(ID= c(1,1,1,1,2,2,2,2,2),
Drug = c("A","A","A","B","A","B","A","A","A"),
Start = c(0,529,12.86,35.57,0.29,17.57,18,20,14),
End=c(5,12,35.57,35.57,2,20.57,18.14,20.57,24))
df1 <- df[order(df$ID, df$Start),]
df1$filter = F
for(k in 2:nrow(df1)){
if(df1$ID[k]==df1$ID[k-1] && df1$Start[k]<=df1$End[k-1] && df1$Drug[k] !=
df1$Drug[k-1]){
df1$filter[k-1]=T
df1$filter[k]=T
}
}
df1
ID Drug Start End filter
1 1 A 0.00 5.00 FALSE
3 1 A 12.86 35.57 TRUE
4 1 B 35.57 35.57 TRUE
2 1 A 529.00 12.00 FALSE
5 2 A 0.29 2.00 FALSE
9 2 A 14.00 24.00 TRUE
6 2 B 17.57 20.57 TRUE
7 2 A 18.00 18.14 TRUE
8 2 A 20.00 20.57 FALSE <- ** that should be TRUE, it's overlap with Drug B (row 6)
无论如何要修复该循环或使用一些不同的方法?一个人可以一次服用这么多药物
更新数据
rm(list = ls())
ID <- c(rep(1,4), rep(3, 5), rep(4,4),rep(5,5))
Drug <-
c("A","A","B","A","A","C","D","A","C","A","A","B","B","A","A","B","B","C")
Begin<- c(0,2.5,3,4,7,8,7,25,25,10,15,1,2,1,6,10,11,13)
End <- c(1.5,3.5,3,6,12,8,11,29,35, 12,19,1,28,5,20,30,20,25)
Dose <- c(1:18)
df <- data.frame(ID,Drug, Begin, End,Dose)
df1 <- df[order(df$ID, df$End),]
我觉得我有过滤器变量,但我有问题得到药物重叠对,这里是我想要的对变量的输出
df1 <- df1 %>% group_by(ID) %>% mutate(max = (cummax(End)))
df1$filter = F
for(k in 2:nrow(df1)){
if(df1$ID[k]==df1$ID[k-1] && df1$Begin[k]<=df1$max[k-1] && df1$Drug[k] !=
df1$Drug[k-1]){
df1$filter[k-1]=T
df1$filter[k]=T
}
}
df2 <- subset(df1, df1$filter == T)
ID Drug Begin End Dose max filter pair
<dbl> <fctr> <dbl> <dbl> <int> <dbl> <lgl>
1 1 B 3.0 3.0 3 3.0 TRUE (B,A)
2 1 A 2.5 3.5 2 3.5 TRUE (B,A)
3 3 C 8.0 8.0 6 8.0 TRUE (C,D,A)
4 3 D 7.0 11.0 7 11.0 TRUE (C,D,A)
5 3 A 7.0 12.0 5 12.0 TRUE (C,D,A)
6 3 A 25.0 29.0 8 29.0 TRUE (A,C)
7 3 C 25.0 35.0 9 35.0 TRUE (A,C)
8 4 A 15.0 19.0 11 19.0 TRUE (A,B)
9 4 B 2.0 28.0 13 28.0 TRUE (A,B)
10 5 A 6.0 20.0 15 20.0 TRUE (A,B,C)
11 5 B 11.0 20.0 17 20.0 TRUE (A,B,C)
12 5 C 13.0 25.0 18 25.0 TRUE (A,B,C)
13 5 B 10.0 30.0 16 30.0 TRUE (A,B,C)