我需要手动计算Cox PH模型的线性预测因子。
我可以获取连续变量和二进制变量,以匹配predict.coxph(指定“ lp”)的输出,但是我似乎无法弄清楚如何为两个以上级别的分类变量进行计算。
我的目标是在我自己的数据中评估已发布模型的校准-我只有系数,因此需要能够手工完成。
这篇关于stackoverflow的文章描述了如何计算连续变量...
(Coxph predictions don't match the coefficients)
任何建议将不胜感激!谢谢
R示例...
URL <- "http://socserv.mcmaster.ca/jfox/Books/Companion/data/Rossi.txt"
Rossi <- read.table(URL, header=TRUE)
summary(Rossi[,c("week", "arrest", "fin")])
# week arrest fin
# Min. : 1.00 Min. :0.0000 no :216
# 1st Qu.:50.00 1st Qu.:0.0000 yes:216
# Median :52.00 Median :0.0000
# Mean :45.85 Mean :0.2639
# 3rd Qu.:52.00 3rd Qu.:1.0000
# Max. :52.00 Max. :1.0000
library(survival)
#for binary variable
fitCPH <- coxph(Surv(week, arrest) ~ fin, data=Rossi) #Cox-PH model
(coefCPH <- coef(fitCPH)) # estimated coefficients
# finyes
#-0.3690692
head(predict(fitCPH,type="lp"))
#[1] 0.1845346 0.1845346 0.1845346 -0.1845346 0.1845346 0.1845346
head(((as.numeric(Rossi$fin) - 1) - mean(as.numeric(Rossi$fin) - 1)) * coef(fitCPH))
#[1] 0.1845346 0.1845346 0.1845346 -0.1845346 0.1845346 0.1845346
#for categorical variable
set.seed(170981)
Rossi$categorical.example <- as.factor(sample(1:3,nrow(Rossi),replace = TRUE))
summary(Rossi[,c("week", "arrest", "categorical.example")])
# week arrest categorical.example
# Min. : 1.00 Min. :0.0000 1:156
# 1st Qu.:50.00 1st Qu.:0.0000 2:138
# Median :52.00 Median :0.0000 3:138
# Mean :45.85 Mean :0.2639
# 3rd Qu.:52.00 3rd Qu.:1.0000
# Max. :52.00 Max. :1.0000
fitCPH2 <- coxph(Surv(week, arrest) ~ categorical.example, data=Rossi) #Cox-PH model
(coefCPH2 <- coef(fitCPH2))
#categorical.example2 categorical.example3
# -0.181790 -0.103019
head(predict(fitCPH2,type="lp"))
#[1] 0.09098066 -0.01203832 -0.01203832 0.09098066 -0.09080938 -0.09080938
#How to calculate manually??
答案 0 :(得分:0)
如果有人感兴趣,我想我已经知道了...
#How to calculate manually??
dv1 <- as.numeric(Rossi$categorical.example)-1 #make 0,1,2 rather than 1,2,3
dv1[dv1==2] <- 0
dv2 <- as.numeric(Rossi$categorical.example)-2 #make -1,0,1 rather than 1,2,3
dv2[dv2==-1] <- 0
meandv1 <- mean(dv1)
meandv2 <- mean(dv2)
head(((dv1-meandv1)*coefCPH2 [1])+((dv2-meandv2)*coefCPH2 [2]))
#[1] 0.09098066 -0.01203832 -0.01203832 0.09098066 -0.09080938 -0.09080938
all(round(predict(fitCPH2,type="lp"),5)==round(((dv1-meandv1)*coefCPH2 [1])+((dv2-meandv2)*coefCPH2 [2]),5))
#[1] TRUE