Frank Harrell使用rms val.surv使用外部数据集验证Cox模型的错误

时间:2019-02-13 22:38:11

标签: r validation cox

我使用rms软件包开发了一个癌症总体生存率的cox模型。我使用765个观测值(194个实习生,571个实习生)。所以我分为两个数据集:训练和验证

我得到了正确的考克斯模型。当我尝试验证时,出现此错误:

Variable length differ (found for 'N')
In addition: Warning message:'newdata' had 571 rows but variables found have 194 rows.

有人可以帮助我吗? :)

这是我的代码:

  #load packages 
library(survival)
library(rms)
library(dplyr)


#Define survival variables Zeit=time, Status=status

Zeit <- mydata$UEBERLEBEN
Status <- mydata$TOD



Alter <- mydata$KRANK_ALT
KOHORTE <- mydata$KOHORTE


#Define predcictor Variables

set.seed(88)
units(Zeit)= "Year"
label(Alter)= "Age at diagnosis"



T <- factor(mydata$HIGHEST_T,levels = 1:4, labels= c('T1', 'T2', 'T3', 'T4'))
N <- factor(mydata$N_STAGE_EINFACH,levels = 0:1, labels= c('N0', '>=N1'))
M <- factor(mydata$HIGHEST_M,levels =  0:1, labels=c('M0', 'M1'))
Rezidiv <-factor(mydata$REZIDIV,levels =  0:1, labels=c('NO', 'YES'))
Malig <- factor(mydata$MALIG,levels =  1:2, labels=c('Low GRADE', 'HIGH GRADE'))
Histo <- factor(mydata$HIST_WICHTIG,levels =  8:13, labels = c('Adeno-Ca', 'Acinic-Ca', 'ACC','MEC', 'PEC', 'other'))
Geschlecht <- factor(mydata$SEX, levels = 1:2, labels= c('female','male'))

#Define Survival object
S <- Surv(Zeit, Status==1)


#Define Datadist

dd<- datadist( Geschlecht,T,N,M, Alter, Rezidiv, Malig, Histo)
options(datadist='dd')


#build cox model (KOHORTE==0 means Training data... Training is coded by 0)

Cox <- cph(S ~ T + N +M + Alter + Histo , surv = TRUE, subset = KOHORTE==0,  time.inc = 5,  x=TRUE, y=TRUE)

print(Cox)

#Validation
#Build Validation data set ( Validation group is coded by 1)

Validation <- subset(mydata, KOHORTE==1)

#Define new Survival object for validation data set
ZeitV <- Validation$UEBERLEBEN
StatusV <- Validation$TOD
V <- Surv(ZeitV, StatusV)
Valid <-val.surv(Cox, newdata=Validation, S=V )
plot(Valid)

0 个答案:

没有答案