我正在执行生存分析,不确定我是否做对了。我的数据集是种子发芽实验的结果。感兴趣的主要变量是“治疗”变量(分类为3个级别)。在我的脚本中,我试图通过比较PH系数百分比来找出治疗之间是否存在差异,哪一种是最好的,以及在什么程度上有所不同。有人可以帮我解决一些我正在处理的问题吗?
1)我需要将我的变量声明为.factor()才能使用它们吗?还是整数被平等地解释?
2)如果违反了危险性假设(PH)的比例,我该如何处理我的数据以建立Cox模型?我进行了深入研究,但无法理解添加协变量*时间交互的程序 或对我的模型分层。
3)如何将脆弱性术语包括在cox模型中并检测随机效应(例如,种子发芽的平板,具有4个级别的分类变量,表示重复)。
4)我也无法解释打印内容(summary(cox.fra))。*
*请参见下文
请在下面查看我的两个完整脚本以及注释。
脚本1
rd01 <- read.table("sa_kb01.txt", header = T) # raw dataset, seed
survival
rd01
str(rd01)
rd01$begin <- as.factor(rd01$begin) # integers to factors
rd01$spp <- as.factor(rd01$spp)
rd01$cit <- as.factor(rd01$cit)
rd01$treat <- as.factor(rd01$treat)
rd01$plate <- as.factor(rd01$plate)
str(rd01)
summary(rd01)
names(rd01) # headers
### Survival analysis
# install.packages("survival")
library(survival)
library (survminer)
?survfit
?survfit.formula
?survfit.coxph
?ggsurvplot
## Fit Kaplan-Meier survivor function
km.fit <- survfit(Surv(day, status) ~ treat, data= rd01, type="kaplan-meier")
km.fit
print(summary(km.fit))
plot(km.fit, conf.int= T, fun = "event", mark.time = c(140), pch = c("S", "W", "A"), col = c("darkred","darkblue","darkgreen"), lty = c("solid","dotted","longdash"),lwd = 1.5, xlab = "time [days]", ylab = "germination probability [%]")
print(summary(km.fit))
## Comparison of Survivor Functions
# Log-rank tests
?survdiff
# Log-rank or Mantel-Haenszel test in "rho = 0" OR
# Peto & Peto modification of the Gehan-Wilcoxon test in "rho = 1"
# ... Assess all groups for heterogeneity
lrmh.123 <- survdiff(Surv(day,status) ~ treat, data= rd01, rho= 0)
print(lrmh.123) # If p<0.05 there are difference between all groups!
# ... Comparing groups pairwise
lrmh.120 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=3}, rho= 0)
lrmh.103 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=2}, rho= 0)
lrmh.023 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=1}, rho= 0)
print(lrmh.120)
print(lrmh.103)
print(lrmh.023) # If p<0.05 there are difference pairwised groups!
## Checking Proportional Hazard (PH) assumption
# Define function mlogmlog() to calculate -log(-log(S(t)))
mlogmlog <- function(y){-log(-log(y))}
# Use estimated Kaplan-Meier survivor functions
km.fit
# ... to plot -log(-log(S(t))) versus log(t)
plot(km.fit, fun= mlogmlog, log="x", mark.time= c(140), pch = c("S", "W", "A"), col = c("darkred","darkblue","darkgreen"), lty = c("solid","dotted","longdash"), lwd = 1.5, xlab="time [days]", ylab= "-log(-log(S(t)))") # If lines do not cross, PH assumption is plausible!
# Interpretarion: http://www.sthda.com/english/wiki/cox-model-assumptions#testing-proportional-hazards-assumption
## Checking for multicollinearity
# install.packages("HH")
library(HH)
# Fit a generalized linear model predicting days from treatment
?glm
mc.glm <- glm(day ~ treat, data=rd01)
print(mc.glm) # doesn't need interpretation, only used to create object to VIF function
# Check for multicollinearity among covariates throught variance inflation factor (VIF)
?vif
mc.vif <- vif(mc.glm)
print(mc.vif) # VIF can determine what proportion of the variation in each covariate
# is explained by the other covariates:
# VIF > 10, serious multicollinearity; VIF = 5, evidence of multicollinearity;
# VIF < 1, no evidence of multicollinearity
## Adding covariates to the Cox model
# Create a Cox model
cox.mod <- coxph(Surv(day, status) ~ treat, data= rd01)
print(summary(cox.mod))
# Interpretation: http://www.sthda.com/english/wiki/cox-proportional-hazards-model
# Double check for PH assumption now with Cox model built
dc.ph <- cox.zph(cox.mod)
dc.ph
ggcoxzph(dc.ph) # if global and individual p-vale > 0.05, PH assumption is plausible!
## Including random effects
?frailty
# Adding plate variable as frailty term
cox.fra <- coxph(Surv(day, status) ~ treat + frailty(plate), data= rd01)
print(summary(cox.fra)) # if global and individual p-vale < 0.05,
# maintain frailty term while adding covariates 1 at a time in cox model!`
SCRIPT 2-相同但不同的数据集,无事件地控制treat1!
rd01 <- read.table("sa_hal01.txt", header = T) # raw dataset, seed survival
rd01
str(rd01)
rd01$begin <- as.factor(rd01$begin) # integers to factors
rd01$spp <- as.factor(rd01$spp)
rd01$cit <- as.factor(rd01$cit)
rd01$treat <- as.factor(rd01$treat)
rd01$plate <- as.factor(rd01$plate)
str(rd01)
summary(rd01)
names(rd01) # headers
### Survival analysis
# install.packages("survival")
library(survival)
library (survminer)
?survfit
?survfit.formula
?survfit.coxph
?ggsurvplot
## Fit Kaplan-Meier survivor function
km.fit <- survfit(Surv(day, status) ~ treat, data= rd01, type="kaplan-meier")
km.fit
print(summary(km.fit))
plot(km.fit, conf.int= T, fun = "event", mark.time = c(140), pch = c("S", "W", "A"), col = c("darkred","darkblue","darkgreen"), lty = c("solid","dotted","longdash"),lwd = 1.5, xlab = "time [days]", ylab = "germination probability [%]")
print(summary(km.fit))
## Comparison of Survivor Functions
# Log-rank tests
?survdiff
# Log-rank or Mantel-Haenszel test in "rho = 0" OR
# Peto & Peto modification of the Gehan-Wilcoxon test in "rho = 1"
# ... Assess all groups for heterogeneity
lrmh.123 <- survdiff(Surv(day,status) ~ treat, data= rd01, rho= 0)
print(lrmh.123) # If p<0.05 there are difference between all groups!
# ... Comparing groups pairwise
lrmh.120 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=3}, rho= 0)
lrmh.103 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=2}, rho= 0)
lrmh.023 <- survdiff(Surv(day,status) ~ treat, data= rd01, subset= {treat!=1}, rho= 0)
print(lrmh.120)
print(lrmh.103)
print(lrmh.023) # If p<0.05 there are difference pairwised groups!
## Checking Proportional Hazard (PH) assumption
# Define function mlogmlog() to calculate -log(-log(S(t)))
mlogmlog <- function(y){-log(-log(y))}
# Use estimated Kaplan-Meier survivor functions
km.fit
# ... to plot -log(-log(S(t))) versus log(t)
plot(km.fit, fun= mlogmlog, log="x", mark.time= c(140), pch = c("S", "W", "A"), col = c("darkred","darkblue","darkgreen"), lty = c("solid","dotted","longdash"), lwd = 1.5, xlab="time [days]", ylab= "- log(-log(S(t)))") # If lines do not cross, PH assumption is plausible!
# Interpretarion: http://www.sthda.com/english/wiki/cox-model- assumptions#testing-proportional-hazards-assumption
## Checking for multicollinearity
# install.packages("HH")
library(HH)
# Fit a generalized linear model predicting days from treatment
?glm
mc.glm <- glm(day ~ treat, data=rd01)
print(mc.glm) # doesn't need interpretation, only used to create object to VIF function
# Check for multicollinearity among covariates throught variance inflation factor (VIF)
?vif
mc.vif <- vif(mc.glm)
print(mc.vif) # VIF can determine what proportion of the variation in each covariate
# is explained by the other covariates:
# VIF > 10, serious multicollinearity; VIF = 5, evidence of multicollinearity;
# VIF < 1, no evidence of multicollinearity
## Adding covariates to the Cox model
# Create a Cox model
cox.mod <- coxph(Surv(day, status) ~ treat, data= rd01)
print(summary(cox.mod))
# Interpretation: http://www.sthda.com/english/wiki/cox-proportional-hazards-model
# Double check for PH assumption now with Cox model built
dc.ph <- cox.zph(cox.mod)
dc.ph
ggcoxzph(dc.ph) # if global and individual p-vale > 0.05, PH assumption is plausible!
## Including random effects
?frailty
# Adding plate variable as frailty term
cox.fra <- coxph(Surv(day, status) ~ treat + frailty(plate), data= rd01)
print(summary(cox.fra)) # if global and individual p-vale < 0.05,
# maintain frailty term while adding covariates 1 at a time in cox model!
在两个脚本中似乎有统计学上的显着差异,treat3与其他组也有所不同。在脚本1中,PH被违反,我现在不该做什么。除此之外,脚本1中的Cox模型似乎可以正常工作,并且可以解释危险比,但是在脚本2中,不知道如何解释或解决该问题(控制treat1中没有事件)。