来自survfit对象和textConnection的中位数

时间:2013-12-22 00:09:20

标签: r

我已经使用其他方法从survfit对象中获取中位数,即使用textConnecton,但我遇到了几个问题。

# example
library(survival)
data(cancer)

cox.ph <- coxph(Surv(time, status) ~ strata(I(age > 60)), data = cancer)
coxph.fit <- survfit(cox.ph, conf.type = 'log-log')

tmp <- tail(capture.output(print(coxph.fit)), length(unique(coxph.fit$strata)) + 1)
tmp <- read.table(z <- textConnection(tmp), header = TRUE)

给我这个错误:

read.table出错(z&lt; - textConnection(tmp),header = TRUE):   列数多于列名

以及tmp看起来像什么

> tmp
[1] "                  records n.max n.start events median 0.95LCL 0.95UCL"
[2] "I(age > 60)=FALSE      94    94      94     64    353     268     390"
[3] "I(age > 60)=TRUE      134   134     134    101    301     239     353"

所以我认为问题在于分层中的空间以及textConnection如何阅读它。另一个例子:

cox.ph <- coxph(Surv(time, status) ~ strata(sex), data = cancer)
coxph.fit <- survfit(cox.ph, conf.type = 'log-log')

tmp <- tail(capture.output(print(coxph.fit)), length(unique(coxph.fit$strata)) + 1)
tmp <- read.table(z <- textConnection(tmp), header = TRUE)
close(z)

在这里,tmp表现得像我想要的那样:

> tmp
records n.max n.start events median X0.95LCL X0.95UCL
sex=1     138   138     138    112    270      210      306
sex=2      90    90      90     53    426      345      524
> tmp$median
[1] 270 426

基本上,是否有另一种方法或方法可以告诉textConnection使用多个空格或制表符作为分隔符(如果这确实是问题)?

我需要能够使用这两种方法,即strata(sex)strata(I(...)),因为我在函数中使用它,并且用户提供survfit对象。

第二个问题是(我正在使用Rstudio(不是问题))如果我缩小控制台窗口以便将tmp分成几行输出,就像这样

> tmp
      records n.max n.start
sex=1     138   138     138
sex=2      90    90      90
      events median X0.95LCL
sex=1    112    270      210
sex=2     53    426      345
      X0.95UCL
sex=1      306
sex=2      524

这样我read.table之后的最终数据框就变成了

> tmp
X0.95UCL
sex=1      306
sex=2      524

这显然会成为一个问题:

> tmp$median
NULL

在这里,问题可能是输出被捕获,因为它将在控制台中打印出来,无论打印方式如何或控制台边距有多大,我都想要所有内容。

2 个答案:

答案 0 :(得分:1)

实际上并没有使用任何对象,而是在屏幕上绘制了打印到控制台的副作用。

简单方法:

options(survfit.rmean = "individual")
summary(coxph.fit)$table   # returns the whole table from survmeans
summary(coxph.fit)$table[ , "median"]
#I(age > 60)=FALSE  I(age > 60)=TRUE 
#              353               301 

Hitting self in head: This is now the second time I've gone through the following process needlessly. The extraction of the desired printed table is describe in ?summary.survfit 困难的方法:如果你想获得“中位数”-as-object(并打印它)你可以高举杰出print.survfit函数并修改它以打印并返回隐藏的矩阵的中间列函数survmean创建:

print.survfit.median <- 
    function (x, scale = 1, digits = max(options()$digits - 4, 3), 
             print.rmean = getOption("survfit.print.rmean"),
             rmean = getOption("survfit.rmean"), 
          ...) 
{
    if (inherits(x, "survfitms")) {
        x$surv <- 1 - x$prev
        if (is.matrix(x$surv)) 
            dimnames(x$surv) <- list(NULL, x$states)
        if (!is.null(x$lower)) {
            x$lower <- 1 - x$lower
            x$upper <- 1 - x$upper
        }
    }
    if (!is.null(cl <- x$call)) {
        cat("Call: ")
        dput(cl)
        cat("\n")
    }
    omit <- x$na.action
    if (length(omit)) 
        cat("  ", naprint(omit), "\n")
    savedig <- options(digits = digits)
    on.exit(options(savedig))
    if (!missing(print.rmean) && is.logical(print.rmean) && missing(rmean)) {
        if (print.rmean) 
            rmean <- "common"
        else rmean <- "none"
    }
    else {
        if (is.null(rmean)) {
            if (is.logical(print.rmean)) {
                if (print.rmean) 
                  rmean <- "common"
                else rmean <- "none"
            }
            else rmean <- "none"
        }
        if (is.numeric(rmean)) {
            if (is.null(x$start.time)) {
                if (rmean < min(x$time)) 
                  stop("Truncation point for the mean is < smallest survival")
            }
            else if (rmean < x$start.time) 
                stop("Truncation point for the mean is < smallest survival")
        }
        else {
            rmean <- match.arg(rmean, c("none", "common", "individual"))
            if (length(rmean) == 0) 
                stop("Invalid value for rmean option")
        }
    }
    temp <- survival:::survmean(x, scale = scale, rmean)
    print(temp$matrix[ , "median"])

}s

然后使用它:

> z <- print.survfit.median(coxph.fit)
Call: survfit(formula = cox.ph, conf.type = "log-log")

I(age > 60)=FALSE  I(age > 60)=TRUE 
              353               301 
> z
I(age > 60)=FALSE  I(age > 60)=TRUE 
              353               301 

答案 1 :(得分:0)

如果@IShouldBuyABoat不介意,我会建立他的答案并稍微调整一下。

print.survfit.select <- function (x, vars = c('records','n.max','n.start','events','median','0.95LCL','0.95UCL'), suppress = TRUE,
                                  scale = 1, digits = max(options()$digits - 4, 3), print.rmean = getOption('survfit.print.rmean'), 
                                  rmean = getOption('survfit.rmean'), ...) {

  # usage: 
  # x     survfit object
  # vars  takes c('records','n.max','n.start','events','median','0.95LCL','0.95UCL')
  # ...   see survival:::print.survfit

  if (inherits(x, 'survfitms')) {
    x$surv <- 1 - x$prev
    if (is.matrix(x$surv)) 
      dimnames(x$surv) <- list(NULL, x$states)
    if (!is.null(x$lower)) {
      x$lower <- 1 - x$lower
      x$upper <- 1 - x$upper
    }
  }
  if (!suppress) {
    if (!is.null(cl <- x$call)) {
    cat('Call: ')
    dput(cl)
    cat('\n')
    }
  }
  omit <- x$na.action
  if (length(omit)) 
    cat('  ', naprint(omit), '\n')
  savedig <- options(digits = digits)
  on.exit(options(savedig))
  if (!missing(print.rmean) && is.logical(print.rmean) && missing(rmean)) {
    if (print.rmean) 
      rmean <- 'common'
    else rmean <- 'none'
  } else {
    if (is.null(rmean)) {
      if (is.logical(print.rmean)) {
        if (print.rmean) 
          rmean <- 'common' 
        else rmean <- 'none'
      } else rmean <- 'none'
    }
    if (is.numeric(rmean)) {
      if (is.null(x$start.time)) {
        if (rmean < min(x$time)) 
          stop('Truncation point for the mean is < smallest survival')
      }
      else if (rmean < x$start.time) 
        stop('Truncation point for the mean is < smallest survival')
    } else {
      rmean <- match.arg(rmean, c('none', 'common', 'individual'))
      if (length(rmean) == 0) 
        stop('Invalid value for rmean option')
    }
  }
  temp <- survival:::survmean(x, scale = scale, rmean)

  if (is.null(x$strata)) print(temp$matrix[vars]) 
  else print(temp$matrix[ ,vars])
}

所以现在我们可以

> print.survfit.select(coxph.fit)
Call: survfit(formula = cox.ph, conf.type = "log-log")

      records n.max n.start events median 0.95LCL 0.95UCL
sex=1     138   138     138    112    270     210     306
sex=2      90    90      90     53    426     345     524
> print.survfit.select(coxph.fit, vars = c('0.95LCL','median','0.95UCL'))
Call: survfit(formula = cox.ph, conf.type = "log-log")

      0.95LCL median 0.95UCL
sex=1     210    270     306
sex=2     345    426     524

> z <- print.survfit.select(coxph.fit, vars = c('median'))
Call: survfit(formula = cox.ph, conf.type = "log-log")

sex=1 sex=2 
  270   426 
> z
sex=1 sex=2 
  270   426 

......如果其他人发现有用的话。

再次感谢!