如何使用身份在geom_bar()中通过x轴对facetted ggplot进行排序

时间:2018-02-26 02:06:02

标签: r ggplot2 tidyverse

我有以下数据框:

dat <- structure(list(type = c("exon", "intron", "exon", 
"intron", "exon", "intron", "exon", "intron", 
"exon", "intron", "exon", "intron", "exon", 
"intron", "exon", "intron", "exon", "intron", 
"exon", "intron"), nfn = c("dy1_PLT4.x4_01", 
"dy1_PLT4.x4_01", "dy1_PLT4.x4_02", "dy1_PLT4.x4_02", 
"dy1_PLT4.x4_03", "dy1_PLT4.x4_03", "dy1_PLT4.x4_01", 
"dy1_PLT4.x4_01", "dy1_PLT4.x4_02", "dy1_PLT4.x4_02", 
"dy1_PLT4.x4_03", "dy1_PLT4.x4_03", "dy1_PLT4.x4_04", 
"dy1_PLT4.x4_04", "dy1_PLT4.x4_05", "dy1_PLT4.x4_05", 
"dy1_PLT4.x4_06", "dy1_PLT4.x4_06", "dy1_PLT4.x4_07", 
"dy1_PLT4.x4_07"), perc = c(0.276422764227642, 0.723577235772358, 
0.328301886792453, 0.671698113207547, 0.387096774193548, 0.612903225806452, 
0.739130434782609, 0.260869565217391, 0.367965367965368, 0.632034632034632, 
0.287749287749288, 0.712250712250712, 0.373170731707317, 0.626829268292683, 
0.404320987654321, 0.595679012345679, 0.4625, 0.5375, 0.311418685121107, 
0.688581314878893), celltype = c("F", "F", "F", "F", "F", "F", 
"V", "V", "V", "V", "V", "V", "V", "V", "V", "V", "V", "V", "V", 
"V")), .Names = c("type", "nfn", "perc", "celltype"), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

dat
#>      type            nfn      perc celltype
#> 1    exon dy1_PLT4.x4_01 0.2764228        F
#> 2  intron dy1_PLT4.x4_01 0.7235772        F
#> 3    exon dy1_PLT4.x4_02 0.3283019        F
#> 4  intron dy1_PLT4.x4_02 0.6716981        F
#> 5    exon dy1_PLT4.x4_03 0.3870968        F
#> 6  intron dy1_PLT4.x4_03 0.6129032        F
#> 7    exon dy1_PLT4.x4_01 0.7391304        V
#> 8  intron dy1_PLT4.x4_01 0.2608696        V
#> 9    exon dy1_PLT4.x4_02 0.3679654        V
#> 10 intron dy1_PLT4.x4_02 0.6320346        V
#> 11   exon dy1_PLT4.x4_03 0.2877493        V
#> 12 intron dy1_PLT4.x4_03 0.7122507        V
#> 13   exon dy1_PLT4.x4_04 0.3731707        V
#> 14 intron dy1_PLT4.x4_04 0.6268293        V
#> 15   exon dy1_PLT4.x4_05 0.4043210        V
#> 16 intron dy1_PLT4.x4_05 0.5956790        V
#> 17   exon dy1_PLT4.x4_06 0.4625000        V
#> 18 intron dy1_PLT4.x4_06 0.5375000        V
#> 19   exon dy1_PLT4.x4_07 0.3114187        V
#> 20 intron dy1_PLT4.x4_07 0.6885813        V

使用以下代码:

library(tidyverse)
ggplot(dat, aes(x = nfn, y = perc, fill = type)) +
  geom_bar(stat = "identity") +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal() +
  theme(legend.title = element_blank()) +
  theme(axis.text.x=element_text(angle=45, hjust=1,size=5)) +
  facet_wrap(~celltype, scales = "free") + 
  xlab("")

我可以制作以下图片:

enter image description here

我想要做的是按递增顺序按内含子百分比对每个“F”和“V”面进行x轴排序。我怎样才能做到这一点?

例如,“F”面板中的x轴将被命令为:

dy1_PLT4.x4_03, dy1_PLT4.x4_02, dy1_PLT4.x4_01

“V”面板将被命令进入:

dy1_PLT4.x4_01, dy1_PLT4.x4_06, dy1_PLT4.x4_05,
dy1_PLT4.x4_04, dy1_PLT4.x4_02, ... dy1_PLT4.x4_03

2 个答案:

答案 0 :(得分:2)

这有点棘手,但如果我们以不同的方式标记不同的方面,那么当我们更改显示标签时,您可以获得所需的内容。诀窍是确保手动添加到绘图中的标签与绘图中显示的顺序相同。我用下面这样做。

#Sort the data by percentage (within each facet) and cell type (between each facet)
sorteddat <- dat[order(dat$perc),]
sorteddat <- sorteddat[order(sorteddat$celltype),]

#Keep only introns for level ordering
reduceddat <- sorteddat[sorteddat$type == "intron",]

#Remember, the label names, we'll need this soon
oldLabels <- unique(reduceddat$nfn)

#Change the labels for F types to something else so we can distinguish them as unique factors
reduceddat$nfn <- as.character(reduceddat$nfn)
reduceddat$nfn[reduceddat$celltype == "F"] <-paste0(reduceddat$nfn[reduceddat$celltype == "F"] ,"_F")
reduceddat$nfn <- factor(reduceddat$nfn)

#We need to do this in the sorted data too, since this is what we'll plot
sorteddat$nfn <- as.character(sorteddat$nfn)
sorteddat$nfn[sorteddat$celltype == "F"] <-paste0(sorteddat$nfn[sorteddat$celltype == "F"] ,"_F")
sorteddat$nfn <- factor(sorteddat$nfn)

#Relevel the nfn variable based on the sorted dataframe
sorteddat$nfn <- factor(sorteddat$nfn, levels = unique(reduceddat$nfn))

#Plot with the addition of the manual labels we stored earlier.
ggplot(sorteddat, aes(x = nfn, y = perc, fill = type)) +
  geom_bar(stat = "identity") +
  scale_fill_brewer(palette = "Set2") +
  theme_minimal() +
  theme(legend.title = element_blank()) +
  theme(axis.text.x=element_text(angle=45, hjust=1,size=5)) +
  facet_wrap(~celltype, scales = "free_x") + 
  scale_x_discrete(labels = oldLabels) +
  xlab("")

enter image description here

答案 1 :(得分:2)

一种选择是将数据框分成两个独立的数据框celltype,分别绘制每个数据框,然后将它们放在一起,就像它们已被刻面一样。然后可以为celltype的每个级别单独设置因子级别。

以下是一个例子。我也改变了x轴标记。由于每个nfn值的最终数字标识符都相同,因此您可以删除重复的文本,只将数字绘制为刻度标签,并使用dy1_PLT4.x4作为x轴标题,情节更容易阅读。

library(tidyverse)
library(scales)
library(egg)
library(grid)

# Split data by celltype and create two plot, stored in a list
p = map(split(dat %>% arrange(celltype, perc), dat$celltype), 
        ~ggplot(.x %>% mutate(nfn=gsub(".*(.{2}$)", "\\1", nfn),
                              nfn=factor(nfn, levels=nfn[type=="intron"])), 
                 aes(x = nfn, y = perc, fill = type)) +
          geom_col() +
          scale_fill_brewer(palette = "Set2") +
          scale_y_continuous(labels=percent) +
          theme_minimal() +
          theme(legend.title = element_blank(),
                axis.title.x=element_blank()) +
          facet_grid(~celltype) + 
          labs(y="Percent") 
       )

# Remove y labels and ticks from right plot
p[[2]] = p[[2]] + theme(axis.text.y=element_blank(),
                        axis.ticks.y=element_blank(),
                        axis.title.y=element_blank())

# Remove legend from left plot
p[[1]] = p[[1]] + guides(fill=FALSE)

# Lay out the two plots
ggarrange(plots=p, ncol=2, widths=c(3,7), 
          bottom=textGrob(gsub("(.*).{3}$", "\\1", dat$nfn[1]), gp=gpar(fontsize=10)))

enter image description here

另一种选择是仅绘制内含子百分比,因为外显子百分比总是100 - 内含子百分比。如果我们这样做,我们可以利用reorder_within包中的scale_x_reordereddrlib功能(这是一个随时可能更改的&#34;个人&#34;包,如果你想定期使用它们,最好自己制作这些功能的副本。这也具有使用更少墨水来呈现数据的优点。

#devtools::install_github("dgrtwo/drlib")
library(drlib)

ggplot(dat %>% filter(type=="intron") %>% 
         mutate(nfn=gsub(".*(.{2}$)", "\\1", nfn)), 
       aes(reorder_within(nfn, perc, celltype), perc)) +
  geom_text(aes(label=sprintf("%1.1f", perc*100)), size=3.5) +
  facet_grid(.~celltype, scales="free_x", space="free_x") +
  scale_x_reordered() +
  scale_y_continuous(limits=c(0,1), labels=percent, expand=c(0,0)) +
  theme_classic() +
  theme(panel.border=element_rect(colour="grey50", fill=NA)) +
  labs(x=gsub("(.*).{3}$", "\\1", dat$nfn[1]), y="Percent Intron")

enter image description here