使用ggplot2定制森林图。不能有多个组,CI超过下限

时间:2015-05-26 16:50:18

标签: r plot ggplot2

我写了一个函数来从回归结果中绘制CI的森林图。

我向函数提供了一个带有预测标签($ label),估计($ coef),低和高CI($ ci.low,$ ci.high),样式($ style)的data.frame:

structure(list(label = structure(c(9L, 4L, 8L, 2L, 6L, 10L, 3L, 
7L, 1L, 5L), .Label = c("    - frattura esposta", "    - frattura esposta 2", 
"    - lembo di perone vs lembo corticoperiostale", "    - lembo di perone vs lembo corticoperiostale 2", 
"    - sesso maschile vs femminile", "    - sesso maschile vs femminile 2", 
"    - trauma bassa energia", "    - trauma bassa energia 2", 
"Tempo di guarigione 2:", "Tempo di guarigione:"), class = "factor"), 
    coef = c(NA, 0.812, 0.695, 1.4, 0.682, NA, 0.812, 0.695, 
    1.4, 0.682), ci.low = c(NA, 0.405, 0.31, 1.26, 0.0855, NA, 
    0.405, 0.31, 1.26, 0.0855), ci.high = c(NA, 1.82, 0.912, 
    2.94, 1.01, NA, 1.82, 0.912, 2.94, 1.01), style = structure(c(1L, 
    2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), .Label = c("bold", "plain"
    ), class = "factor")), .Names = c("label", "coef", "ci.low", 
"ci.high", "style"), class = "data.frame", row.names = c(NA, 
-10L))

我希望在估算值周围显示CI,并在可能的情况下对预测变量进行分组。为了第一个目标,我翻转了轴并使用了误差条;对于后者,我在数据框中创建了具有标签而非值的行。它成功了:

enter image description here

第一个问题: 正如您所看到的,分组标签是粗体,并且没有任何数据关联。 样式(正常或粗体)在样式列中定义(我计划将其自动化)。问题是,只有当所有标签都不同时才会起作用(请注意我在第一个图表中添加了#34; 2"以使它们不同);带有重复标签的行只显示为空格:

enter image description here

我从"创伤低音能量中移除了2#"标签,它消失了。 (也是风格混乱)。

我想找到一个分组解决方案,甚至与我的实现完全不同但没有相同名称的问题。

第二个问题: 正如你在两个图像中看到的那样,较低的CI条与零点相交,即Odds Ratios(并且在我使用的数据框中给出了数字),这是不可能的。

这是我的代码:

forest.plot <- function(d, xlab = "Coefficients", ylab = "", exp = T, bars = T, lims = NULL){
    require(ggplot2)
    boundary <- 0
    text.pos <- -1.5
    if(is.null(lims)) lims <- c(min(d$ci.low, na.rm = T), max(d$ci.high, na.rm = T))
    p <- ggplot(d, aes(x=label, y=coef), environment = environment()) +
        coord_flip()

    if (exp == T){
        p <- p + scale_y_log10(labels = round)
        boundary <- 1
        if(xlab == 'Coefficients') xlab <- 'Odds Ratios'
    }

    p <- p + geom_hline(yintercept = boundary, lty=2, col = 'darkgray', lwd = 1)

    if (bars == T) {
        text.pos <- -2
        p <- p +
            geom_bar(aes(fill = coef > boundary), stat = "identity", width = .3) +
            geom_errorbar(aes(ymin = ci.low, ymax = ci.high, lwd = .5), colour = "dodgerblue4", width = 0.05)
    }
    else p <- p + geom_errorbar(aes(colour = coef > boundary, ymin = ci.low, ymax = ci.high, width = .05, lwd = .5))

    if (!is.null(d$style)) style <- d[['style']] else style <- rep('plain', nrow(d))

    p <- p + geom_point(colour = 'dodgerblue4', aes(size = 2)) +
        scale_x_discrete(limits=rev(d$label)) +
        geom_text(aes(label = coef, vjust = text.pos)) +
        theme_bw() +
        theme(axis.text.x = element_text(color = 'gray30', size = 16),
                    axis.text.y = element_text(face = rev(style), color = 'gray30', size = 14, hjust=0, angle=0),
                    axis.title.x = element_text(size = 20, color = 'gray30', vjust = 0),
                    axis.ticks = element_blank(),
                    legend.position="none",
                    panel.border = element_blank()) +
        geom_vline(xintercept = 0, lwd = 2) +
        ylab(xlab) +
        xlab(ylab)

    return(p)
}

1 个答案:

答案 0 :(得分:3)

您可以通过创建两个ggplot个对象并通过gridExtra::grid.draw将它们组合在一起来获得所需的结果。

设置

library(ggplot2)
library(gridExtra)
library(grid)

regression_results <- 
  structure(list(label = structure(c(9L, 4L, 8L, 2L, 6L, 10L, 3L, 7L, 1L, 5L), 
                                   .Label = c("    - frattura esposta", "    - frattura esposta 2", "    - lembo di perone vs lembo corticoperiostale", "    - lembo di perone vs lembo corticoperiostale 2", "    - sesso maschile vs femminile", "    - sesso maschile vs femminile 2", "    - trauma bassa energia", "    - trauma bassa energia 2", "Tempo di guarigione 2:", "Tempo di guarigione:"), 
                                   class = "factor"), 
                 coef = c(NA, 0.812, 0.695, 1.4, 0.682, NA, 0.812, 0.695, 1.4, 0.682), 
                 ci.low = c(NA, 0.405, 0.31, 1.26, 0.0855, NA, 0.405, 0.31, 1.26, 0.0855), 
                 ci.high = c(NA, 1.82, 0.912, 2.94, 1.01, NA, 1.82, 0.912, 2.94, 1.01), 
                 style = structure(c(1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L), 
                                   .Label = c("bold", "plain"), class = "factor")), 
            .Names = c("label", "coef", "ci.low", "ci.high", "style"), 
            class = "data.frame", 
            row.names = c(NA, -10L))

# Set a y-axis value for each label
regression_results$yval <- seq(nrow(regression_results), 1, by = -1)

建立森林情节

# Forest plot
forest_plot <- 
  ggplot(regression_results) + 
    theme_bw() + 
    aes(x = coef, xmin = ci.low, xmax = ci.high, y = yval) + 
    geom_point() + 
    geom_errorbarh(height = 0.2, color = 'red') + 
    geom_vline(xintercept = 1) + 
    theme(
          axis.text.y = element_blank(),
          axis.title.y = element_blank(),
          axis.ticks.y = element_blank(),
          panel.grid.major.y = element_blank(), 
          panel.grid.minor.y = element_blank(), 
          panel.border = element_blank() 
          )  +
    ylim(0, 10) + 
    xlab("Odds Ratio")

制作标签图

# labels, could be extended to show more information
table_plot <-
  ggplot(regression_results) + 
    theme_bw() + 
    aes(y = yval) + 
    geom_text(aes(label = gsub("\\s2", "", label), x = 0), hjust = 0) + 
    theme(
          axis.text = element_blank(),
          axis.title = element_blank(),
          axis.ticks = element_blank(),
          panel.grid = element_blank(),
          panel.border = element_blank() 
          ) + 
    xlim(0, 6) +
    ylim(0, 10)

制作剧情

# build the plot
png(filename = "so-example.png", width = 8, height = 6, units = "in", res = 300)

grid.draw(gridExtra:::cbind_gtable(ggplotGrob(table_plot), ggplotGrob(forest_plot), size = "last"))

dev.off()

enter image description here