自定义函数内的图(ggplot2)

时间:2014-01-22 16:24:12

标签: r ggplot2

我已经编写了一个函数,可以使用ggplot2根据自己的喜好生成生存图。我希望这个图可以自定义,但因为我正在创建一个包含三个图的面板,所以我不能简单地返回绘图对象来添加其他自定义。

我已经成功地使用字符串完成了我想要的东西(请参阅问题末尾的函数),但我想知道是否有更好的方法来执行此操作;使用字符串看起来很陌生。

例如,该功能目前允许我这样做:

require(survival)
fit <- survfit(Surv(time, status) ~ x, data=aml)
ggSurvGraph(fit, times=seq(0, 60, by=12), offset.scale=1, xlim=c(0, 60), 
    gg_expr="ylab('Percentage Survived') + xlab('Time Elapsed') +
             scale_y_continuous(breaks=seq(0, 1.0, by=.25), labels=seq(0, 100, by=25))")

ggexpr

添加到情节中
if (!missing(gg_expr)) .plot <- eval(parse(text=paste(".plot + ", gg_expr, sep="")))

但在我看来,在gg_expr而不是字符串中传递表达式会更自然。比如这个:

require(survival)
fit <- survfit(Surv(time, status) ~ x, data=aml)
ggSurvGraph(fit, times=seq(0, 60, by=12), offset.scale=1, xlim=c(0, 60), 
    gg_expr=ylab('Percentage Survived') + xlab('Time Elapsed') +
            scale_y_continuous(breaks=seq(0, 1.0, by=.25), labels=seq(0, 100, by=25)))

有没有更好的方法来解决这个问题? (图表的副本跟随功能代码)

ggSurvGraph <- function(object, times, cum.inc=FALSE, conf.bar=TRUE,
                        offset.scale=1, n.risk=FALSE, n.event=FALSE,
                        xlim, gg_expr){
  require(stringr)
  require(plyr)
  require(ggplot2)
  require(gridExtra)
  require(survival)

  #**************************************************************
  #*** Parameter checking
  error.count <- 0
  error.msg <- NULL

  #*** 'object' should be either a 'survfit' object or a 'data.frame'
  if (!(any(class(object) %in% c("survfit","data.frame")))){
    error.count <- error.count + 1
    error.msg <- c(error.msg, str_c(error.count, ": \'object\' must be either a survfit object or a data frame", sep=""))
  }

  #*** When 'object' is a data frame, it must have the columns in 'req.col'
  #*** This is a feature that was added so that we could make survival graphs with PROC LIFETEST output
  req.col <- c("time","surv","lower","upper","n.risk","n.event")
  if ("data.frame" %in% class(object) && !any(req.col %in% names(object))){
    miss.col <- str_c("\'", req.col[!req.col %in% names(object)], "\'", sep="", collapse=", ")
    error.count <- error.count + 1
    error.msg <- c(error.msg, str_c(error.count, ": data frame \'object\' is missing columns ", miss.col, sep=""))
  }

  #*** Stop the function if any parameter checks failed
  if (error.count){
    stop(str_c(error.msg, collapse="\n"))
  }

  #********************************************************************
  #*** Prepare the data for plotting

  #*** Create data frame from survfit object
  if ("survfit" %in% class(object)) survData <- createSurvivalFrame(object) else survData <- object
  if (is.null(survData$strata)) survData$strata <- factor(1)
  if (cum.inc) survData <- transform(survData,
                                     surv = 1-surv,
                                     lower = 1-lower,
                                     upper = 1-upper)
  survData <- ddply(survData,
                    "strata",
                    transform,
                    cum.evt = cumsum(n.event))

  #*** Generate offset values
  if(nlevels(survData$strata)>1){
    offset <- seq.int(-1*ceiling(nlevels(survData$strata)/2),ceiling(nlevels(survData$strata)/2),length.out=nlevels(survData$strata)+1)
    offset <- offset[offset!=0]
    offset <- offset[order(abs(offset))] * offset.scale
  }
  else offset <- 0
  offset <- data.frame(strata = levels(survData$strata), offset = offset)

  survData <- merge(survData, offset, by="strata")

  #*************************************************************
  #* Limit to 'times' argument

  extractSurvTimes <- function(df, reportTime=times){
    .out <- df[sapply(reportTime, function(t) max(which(df$time <= t))), ]
    .out$reportTime <- reportTime
    return(.out)
  }

  survData <- transform(survData, reportTime = time)
  survTimes <- if (missing(times)) survData
               else do.call("rbind", lapply(levels(survData$strata), function(x) extractSurvTimes(subset(survData, strata==x))))


  if (missing(xlim)) xlim <- c(0, max(survData$time, na.rm=TRUE))
  #*************************************************************
  #*** Create Plot

  #*** Creates a blank plot for a spacer between survival plot and risk/event data
  blank.pic <- ggplot(survData, aes(time, surv)) +
    geom_blank() + theme_bw() +
    theme(axis.text.x = element_blank(), axis.text.y = element_blank(),
         axis.title.x = element_blank(), axis.title.y = element_blank(),
         axis.ticks = element_blank(),
         panel.grid = element_blank(), panel.border = element_blank())


  #*** Create the survival plot
  if (nlevels(survData$strata) > 1){ 
    .plot <- ggplot(survData, aes(x=time, y=surv, colour=strata)) + geom_step() 
  }
  else{
    .plot <- ggplot(survData, aes(x=time, y=surv)) + geom_step()
  }

  .plot <- .plot + scale_x_continuous(limits = xlim) 

  #*** Add Confidence bars
  if (conf.bar){
      .plot <- .plot +  
                geom_segment(data=survTimes, aes(x=reportTime + offset, xend=reportTime + offset, y=lower, yend=upper))
  }

  if (!missing(gg_expr)) .plot <- eval(parse(text=paste(".plot + ", gg_expr, sep="")))

  riskTable <- survTimes
  riskTable <- melt(riskTable[, c("reportTime", "strata", "n.risk", "cum.evt")],
                    c("reportTime", "strata"))
  riskTable <- transform(riskTable,
                         y.pos = ifelse(variable %in% "n.risk", 1, 0))

  .risk <- ggplot(survData, aes(x=time, y=surv)) + 
    geom_text(data=riskTable, aes(x=reportTime, y=rev(variable), label=value), size=3.5, hjust=0) + 
    theme_bw() + 
#     scale_y_discrete(breaks = as.character(levels(riskTable$strata)),
#                      labels = levels(riskTable$variable)) + 
    theme(axis.text.x = element_blank(), 
          axis.title.x = element_blank(), axis.title.y = element_blank(),
          axis.ticks = element_blank(),
          panel.grid = element_blank(), panel.border = element_blank())  + 
    scale_x_continuous(limits = xlim) + 
    scale_y_discrete(labels=c("N Event", "N at Risk"))

  if (nlevels(riskTable$strata) > 1) .risk <- .risk + facet_wrap(~ strata, ncol=1)

  grid.arrange(.plot + theme(plot.margin = unit(c(1,1,0,.5), "lines"), legend.position="bottom"), 
               blank.pic + theme(plot.margin = unit(c(0,0,0,0), "lines")), 
               .risk + theme(plot.margin = unit(c(0,1,0,0), "lines")), 
               clip = FALSE, nrow = 3,
               ncol = 1, heights = unit(c(.70, .04, .35),c("null", "null", "null"))) 


}

Sample KM Plot

2 个答案:

答案 0 :(得分:2)

我不明白你的问题,但我觉得list()语法可能有帮助,

p = qplot(1:10, 1:10)

p + list(ylab("label"), 
         scale_x_continuous(), 
         geom_line())

答案 1 :(得分:0)

摘要

此答案允许您以标准ggplot形式将ggplot个对象添加到函数的返回值中。在这里,我们将coord_cartesian添加到函数的第一个图形中,该函数返回两个图形(您的图形和表格会返回一个图表,但同样的想法):

my_plots() + coord_cartesian(ylim=c(0, 5))

enter image description here

另外,请注意您可以添加任何ggplot对象。我们刚刚选择coord_cartesian因为它很方便。在您的情况下,您可以将ggSurvGraph修改为my_plots,这应该非常简单。

详细

该策略依赖于不使用grid.arrange的函数,而是依赖于函数返回的对象print方法使用grid.arrange。我编写了一个制作条形图和散点图的函数,但我认为它很好地说明了这一点。

library(gridExtra)
library(ggplot2)
my_plots <- function() {
  df <- data.frame(x=1:10, y=(1:10)/10)        
  gg1 <- ggplot(df) + geom_point(aes(x=x, y=y))
  gg2 <- ggplot(df) + geom_bar(aes(x=x, y=y), stat="identity")         
  structure(list(gg1, gg2), class="myplots")
}

这里的关键是我返回的是ggplot个对象的列表,这里有一个自定义类myplots。然后,我可以为该类定义一个print方法,让grid.arrange做到这一点:

print.myplots <- function(x, ...) {
  do.call(grid.arrange, x)
}

my_plots()通过print方法输出图表(注意这里的关键点是你有两个ggplot个对象;我意识到你的其中一个是一个表,但是净结果是相同的):

my_plots()

enter image description here

现在,我可以定义一个+方法,它将第二个操作数添加到列表中的第一个值(所以在你的情况下,这只会影响图形,而不是表格):< / p>

`+.myplots` <- function(e1, e2) {
  e1[[1]] <- e1[[1]] + e2
  invisible(e1)
}

现在,我们可以按照摘要中的说明使用(注意Y轴如何针对第一个图表进行更改):

my_plots() + coord_cartesian(ylim=c(0, 5))

enter image description here