我使用rms软件包开发了一个癌症总体生存率的cox模型。我使用765个观测值(194个实习生,571个实习生)。所以我分为两个数据集:训练和验证
我得到了正确的考克斯模型。当我尝试验证时,出现此错误:
Variable length differ (found for 'N')
In addition: Warning message:'newdata' had 571 rows but variables found have 194 rows.
有人可以帮助我吗? :)
这是我的代码:
#load packages
library(survival)
library(rms)
library(dplyr)
#Define survival variables Zeit=time, Status=status
Zeit <- mydata$UEBERLEBEN
Status <- mydata$TOD
Alter <- mydata$KRANK_ALT
KOHORTE <- mydata$KOHORTE
#Define predcictor Variables
set.seed(88)
units(Zeit)= "Year"
label(Alter)= "Age at diagnosis"
T <- factor(mydata$HIGHEST_T,levels = 1:4, labels= c('T1', 'T2', 'T3', 'T4'))
N <- factor(mydata$N_STAGE_EINFACH,levels = 0:1, labels= c('N0', '>=N1'))
M <- factor(mydata$HIGHEST_M,levels = 0:1, labels=c('M0', 'M1'))
Rezidiv <-factor(mydata$REZIDIV,levels = 0:1, labels=c('NO', 'YES'))
Malig <- factor(mydata$MALIG,levels = 1:2, labels=c('Low GRADE', 'HIGH GRADE'))
Histo <- factor(mydata$HIST_WICHTIG,levels = 8:13, labels = c('Adeno-Ca', 'Acinic-Ca', 'ACC','MEC', 'PEC', 'other'))
Geschlecht <- factor(mydata$SEX, levels = 1:2, labels= c('female','male'))
#Define Survival object
S <- Surv(Zeit, Status==1)
#Define Datadist
dd<- datadist( Geschlecht,T,N,M, Alter, Rezidiv, Malig, Histo)
options(datadist='dd')
#build cox model (KOHORTE==0 means Training data... Training is coded by 0)
Cox <- cph(S ~ T + N +M + Alter + Histo , surv = TRUE, subset = KOHORTE==0, time.inc = 5, x=TRUE, y=TRUE)
print(Cox)
#Validation
#Build Validation data set ( Validation group is coded by 1)
Validation <- subset(mydata, KOHORTE==1)
#Define new Survival object for validation data set
ZeitV <- Validation$UEBERLEBEN
StatusV <- Validation$TOD
V <- Surv(ZeitV, StatusV)
Valid <-val.surv(Cox, newdata=Validation, S=V )
plot(Valid)