如何在gglpot2中定义y轴段

时间:2017-09-28 21:12:16

标签: r ggplot2 facet ggpubr

我有一个数据框:

df <- structure(list(Sample = c("1: FL_643", "2: FL_645", "3: FL_647","4: FL_656", "5: FL_658", "6: cKO_644", "7: cKO_646", "8: cKO_654","9: cKO_655", "10: cKO_657", "1: FL_643", "2: FL_645", "3: FL_647", "4: FL_656", "5: FL_658", "6: cKO_644", "7: cKO_646", "8: cKO_654", "9: cKO_655", "10: cKO_657"), Genotype = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("miR-15/16 FL", "miR-15/16 cKO"), class = "factor"), 
Tissue = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Thymus", 
"iLN", "Spleen", "Skin", "Colon"), class = "factor"), variable = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L), .Label = c("Cells/SC/Live/CD8—,, CD4+,Freq. of Parent", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3+,Freq. of Parent", "Cells/SC/Live/CD8—,, CD4+/Foxp3+,Median,<BV421-A>,CD127", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3+/CD25+,Freq. of Parent", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3+/CD25-,Freq. of Parent", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3-,Freq. of Parent", "Cells/SC/Live/CD8—,, CD4+/Foxp3-,Median,<BV421-A>,CD127", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3-/CD62L—,, CD44—,Freq. of Parent", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3-/CD62L—,, CD44+,Freq. of Parent", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3-/CD62L+,, CD44—,Freq. of Parent", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3-/CD62L+,, CD44+,Freq. of Parent", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3-/CD44+,Freq. of Parent", 
"Cells/SC/Live/CD8+,, CD4—,Freq. of Parent", "Cells/SC/Live/CD8+,, CD4—,Median,<BV421-A>,CD127", 
"Cells/SC/Live/CD8+,, CD4—/CD62L—,, CD44—,Freq. of Parent", 
"Cells/SC/Live/CD8+,, CD4—/CD62L—,, CD44+,Freq. of Parent", 
"Cells/SC/Live/CD8+,, CD4—/CD62L+,, CD44—,Freq. of Parent", 
"Cells/SC/Live/CD8+,, CD4—/CD62L+,, CD44+,Freq. of Parent", 
"Cells/SC/Live/CD8+,, CD4—/CD62L+,, CD44+,Freq. of Parent_1", 
"Cells/SC/Live,Count", "Cells/SC/Live/CD8—,, CD4+,Count", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3+,Count", "Cells/SC/Live/CD8—,, CD4+/Foxp3+/CD25+,Count", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3+/CD25-,Count", "Cells/SC/Live/CD8—,, CD4+/Foxp3-,Count", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3-/CD62L—,, CD44—,Count", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3-/CD62L—,, CD44+,Count", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3-/CD62L+,, CD44—,Count", 
"Cells/SC/Live/CD8—,, CD4+/Foxp3-/CD62L+,, CD44+,Count", 
"Cells/SC/Live/CD8+,, CD4—,Count", "Cells/SC/Live/CD8+,, CD4—/CD62L—,, CD44—,Count", 
"Cells/SC/Live/CD8+,, CD4—/CD62L—,, CD44+,Count", "Cells/SC/Live/CD8+,, CD4—/CD62L+,, CD44—,Count", 
"Cells/SC/Live/CD8+,, CD4—/CD62L+,, CD44+,Count"), class = "factor"), 
value = c(41.2, 35.5, 39.5, 33.2, 39.1, 35.5, 35.7, 33.9, 
39.7, 42.4, 10.9, 12.1, 10.9, 12.5, 12.3, 12.8, 14.1, 15.8, 
14.6, 12.5)), .Names = c("Sample", "Genotype", "Tissue", "variable", "value"), row.names = c(NA, -20L), class = "data.frame")

并使用以下函数绘制数据的各种组合

library(ggplot2)
library(ggpubr)
plot_it <- function(Tissue,
                    row_add = (1:nrow(temp)), 
                    y.lab = "Did you forget to add a label?",
                    font_choice = "Helvetica",
                    font_size = 12,
                    stat_test = "t.test",
                    p_display = "p.signif",
                    legend_position = c("right")) {

  # Subset data frame based on row_add
  rownames(temp) <- NULL
  df <- droplevels(temp[c(row_add),])
  rownames(df) <- NULL
  View(temp)

  # Define color and shape of variables
  color.groups <- c("black","red")
  names(color.groups) <- unique(df$Genotype)
  shape.groups <- c(16, 1)
  names(shape.groups) <- unique(df$Genotype)

  # Generate data frame of reference y-values for p-value labels and bracket positions
  dmax = df %>% group_by(variable) %>% 
    summarise(value=max(value, na.rm=TRUE),
              Genotype=NA)

  # For tweaking position of brackets
  e = max(dmax$value)*0.1
  r = 0.6
  w = 0.19
  bcol = "black"

  # Define y axis and wrap label
  y.axis <- df$value
  y.lab <- str_wrap(y.lab, width = 40)

  ggplot(df, aes(x = variable, y = value, color = Genotype, shape = Genotype)) +
    # geom_violin(position = position_dodge(width = 0.75)) +
    geom_boxplot(position = position_dodge(width = 0.75), outlier.shape = NULL) +
    geom_point(position=position_dodge(width=0.75), size = 2) +
    ylim(0,1.2*max(y.axis, na.rm = TRUE)) + ylab(y.lab) + xlab(df$Tissue) +
    scale_color_manual(values=color.groups) +
    scale_shape_manual(values=shape.groups) +
    scale_x_discrete(labels = function(x) str_wrap(x, width = 20)) +
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
                       panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
                       aspect.ratio = 1, text = element_text(family=font_choice, size = font_size),
                       legend.position = legend_position) +
    stat_compare_means(show.legend = FALSE, label = p_display, method = stat_test,
                       label.y = e + dmax$value, family = font_choice) +
    geom_segment(data=dmax,
                 aes(x=as.numeric(variable)-w, xend=as.numeric(variable)+w,
                     y=value + r*e, yend=value + r*e), size=0.3, color=bcol, inherit.aes=FALSE)
}

通过使用以下调用函数来制作图:

plot_it(Tissue = "Thymus", row_add = c(c(1:30), c(141:150)))

这会产生这个情节:

enter image description here

我想让函数创建一个facet wrap,它可以有效地将y轴转换为两个段,这样两个段的比例就可以实现更好的数据可视化。实际上,当我使用具有高度不同的值的变量创建绘图时,缩放比例对于所有这些变量都是不合适的。

如果无法做到这一点,那么是否有一种简单的方法可以在每个图表的基础上手动引入构面包装以打破y轴?

1 个答案:

答案 0 :(得分:2)

您可以使用群集将具有类似方法的群组保持在一起。但是,由于规模不同,图表可能会产生误导。

在下面的示例中,我使用了假数据,因为样本数据只有两个组。

library(tidyverse)

# Fake data with five groups
set.seed(2)
dat = data.frame(group=rep(LETTERS[1:5], each=20),
                 sub=rep(rep(letters[1:2], each=10), 5),
                 value=rnorm(100, rep(c(20, 17, 27, 56, 80), each=20), 5))

# Add facet groups using kmeans clustering
dat = dat %>% 
  group_by(group) %>% 
  mutate(mean=mean(value)) %>% 
  ungroup %>% 
  mutate(facet_group = kmeans(mean, 2)$cluster)

ggplot(dat, aes(group, value, colour=sub)) +
  geom_boxplot() +
  facet_wrap(~ facet_group, scales="free", ncol=2) +
  expand_limits(y=0) +
  theme_classic() +
  theme(strip.background=element_blank(),
        strip.text=element_blank())

enter image description here

或更复杂的布局选项:

library(gridExtra)

# Create the two plots separately and store them in a list
plots = unique(dat$facet_group) %>% 
  map(
    ~ ggplot(dat[dat$facet_group==.x, ], aes(group, value, colour=sub)) +
      geom_boxplot() +
      expand_limits(y=0) +
      labs(y="", x="") +
      theme_bw() +
      theme(strip.background=element_blank(),
            strip.text=element_blank())
  )

# Extract Legend 
g_legend <- function(a.gplot) { 
  tmp <- ggplot_gtable(ggplot_build(a.gplot)) 
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box") 
  legend <- tmp$grobs[[leg]] 
  return(legend)
  } 

leg = g_legend(plots[[1]])

# Lay out the plots and the legend
grid.arrange(
  arrangeGrob(grobs=map(plots, function(x) x + guides(colour=FALSE)), widths=c(3,2)),
  leg, widths=c(10,1), left="Value", bottom="Group"
  )

enter image description here