我有兴趣在我的Cox回归模型中将治疗作为时变协变量包括在内。要做到这一点,我只是'必须在治疗间隔中分割每个患者的发作。我有问题以干净的间隔安排我的治疗数据。每位患者可以进行多次治疗(A / B / ..),这可能部分重叠。我想将重叠治疗视为一个单独的类别(A / B / AB)。
我的治疗数据如下:
df <- structure(list(Pt = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L),
start = c(30L, 35L, 60L, 80L, 120L, 5L, 15L, 45L, 65L, 80L
), stop = c(45L, 100L, 90L, 95L, 150L, 20L, 35L, 55L, 100L,
90L), tx = structure(c(1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L,
2L), .Label = c("A", "B"), class = "factor")), .Names = c("Pt",
"start", "stop", "tx"), class = "data.frame", row.names = c(NA,
-10L))
> df
Pt start stop tx
1 1 30 45 A
2 1 35 100 B
3 1 60 90 A
4 1 80 95 A
5 1 120 150 B
6 2 5 20 A
7 2 15 35 B
8 2 45 55 B
9 2 65 100 A
10 2 80 90 B
我想得到的是:
> result
Pt start stop tx
1 1 0 30 None
2 1 30 35 A
3 1 35 45 AB
4 1 45 60 B
5 1 60 95 AB
6 1 95 100 B
7 1 100 120 None
8 1 120 150 B
9 2 0 5 None
10 2 5 15 A
11 2 15 20 AB
12 2 20 35 B
13 2 35 45 None
14 2 45 55 B
15 2 55 65 None
16 2 65 80 A
17 2 80 90 AB
18 2 90 100 A
请注意:
编辑: 我使用for循环创建了一个丑陋的解决方案。我不确定这是否会在我的大型数据集上运行,但至少它可以很好地完成这个示例。加快欢迎的想法!
# Create empty data.frame for results
results <- data.frame(Pt=c(), start=c(), stop=c(), tx=c())
for(i in 1:length(unique(df$Pt))){ # Loop over patients
pt <- subset(df, Pt == unique(df$Pt)[i])
out <- data.frame(day = c(1:max(pt$stop))) # Create df (1 row for each day; pt specific) which will be filled with treatment info below
# Loop over treatments (allows for patient's treatments to differ)
for(t in 1:length(unique(pt$tx))){
name <- as.character(unique(pt$tx)[t])
out[[name]] <- NA
out[[name]][unique(eval(parse(text=paste0("c(",paste(pt$start[pt$tx == unique(pt$tx)[t]], pt$stop[pt$tx == unique(pt$tx)[t]], sep=":", collapse=","), ")"))))] <- name
}
out[is.na(out)==T] <- "" # Remove NA
out$tx <- do.call(paste, c(out[-1], sep="")) # Concatenate treatment columns
out$tx[out$tx ==""] <- "None"
out # Check
# Now compact the tx as intervals
starting.points = c(0,1+which(diff(as.numeric(as.factor(out$tx)))!=0))
stopping.points = c(starting.points[-1], dim(out)[1])
txs = out$tx[c(1, starting.points[-1])]
# And write to the general 'results' dataframe
results <- rbind(results, data.frame(Pt=rep(unique(df$Pt)[i], length(txs)), start=starting.points, stop=stopping.points, tx=txs))
}
results
> results
Pt start stop tx
1 1 0 30 None
2 1 30 35 A
3 1 35 46 AB
4 1 46 60 B
5 1 60 96 AB
6 1 96 101 B
7 1 101 120 None
8 1 120 150 B
9 2 0 5 None
10 2 5 15 A
11 2 15 21 AB
12 2 21 36 B
13 2 36 45 None
14 2 45 56 B
15 2 56 65 None
16 2 65 80 A
17 2 80 91 AB
18 2 91 100 A