我有以下数据框:
dat <- structure(list(type = c("exon", "intron", "exon",
"intron", "exon", "intron", "exon", "intron",
"exon", "intron", "exon", "intron", "exon",
"intron", "exon", "intron", "exon", "intron",
"exon", "intron"), nfn = c("dy1_PLT4.x4_01",
"dy1_PLT4.x4_01", "dy1_PLT4.x4_02", "dy1_PLT4.x4_02",
"dy1_PLT4.x4_03", "dy1_PLT4.x4_03", "dy1_PLT4.x4_01",
"dy1_PLT4.x4_01", "dy1_PLT4.x4_02", "dy1_PLT4.x4_02",
"dy1_PLT4.x4_03", "dy1_PLT4.x4_03", "dy1_PLT4.x4_04",
"dy1_PLT4.x4_04", "dy1_PLT4.x4_05", "dy1_PLT4.x4_05",
"dy1_PLT4.x4_06", "dy1_PLT4.x4_06", "dy1_PLT4.x4_07",
"dy1_PLT4.x4_07"), perc = c(0.276422764227642, 0.723577235772358,
0.328301886792453, 0.671698113207547, 0.387096774193548, 0.612903225806452,
0.739130434782609, 0.260869565217391, 0.367965367965368, 0.632034632034632,
0.287749287749288, 0.712250712250712, 0.373170731707317, 0.626829268292683,
0.404320987654321, 0.595679012345679, 0.4625, 0.5375, 0.311418685121107,
0.688581314878893), celltype = c("F", "F", "F", "F", "F", "F",
"V", "V", "V", "V", "V", "V", "V", "V", "V", "V", "V", "V", "V",
"V")), .Names = c("type", "nfn", "perc", "celltype"), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
dat
#> type nfn perc celltype
#> 1 exon dy1_PLT4.x4_01 0.2764228 F
#> 2 intron dy1_PLT4.x4_01 0.7235772 F
#> 3 exon dy1_PLT4.x4_02 0.3283019 F
#> 4 intron dy1_PLT4.x4_02 0.6716981 F
#> 5 exon dy1_PLT4.x4_03 0.3870968 F
#> 6 intron dy1_PLT4.x4_03 0.6129032 F
#> 7 exon dy1_PLT4.x4_01 0.7391304 V
#> 8 intron dy1_PLT4.x4_01 0.2608696 V
#> 9 exon dy1_PLT4.x4_02 0.3679654 V
#> 10 intron dy1_PLT4.x4_02 0.6320346 V
#> 11 exon dy1_PLT4.x4_03 0.2877493 V
#> 12 intron dy1_PLT4.x4_03 0.7122507 V
#> 13 exon dy1_PLT4.x4_04 0.3731707 V
#> 14 intron dy1_PLT4.x4_04 0.6268293 V
#> 15 exon dy1_PLT4.x4_05 0.4043210 V
#> 16 intron dy1_PLT4.x4_05 0.5956790 V
#> 17 exon dy1_PLT4.x4_06 0.4625000 V
#> 18 intron dy1_PLT4.x4_06 0.5375000 V
#> 19 exon dy1_PLT4.x4_07 0.3114187 V
#> 20 intron dy1_PLT4.x4_07 0.6885813 V
使用以下代码:
library(tidyverse)
ggplot(dat, aes(x = nfn, y = perc, fill = type)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Set2") +
theme_minimal() +
theme(legend.title = element_blank()) +
theme(axis.text.x=element_text(angle=45, hjust=1,size=5)) +
facet_wrap(~celltype, scales = "free") +
xlab("")
我可以制作以下图片:
我想要做的是按递增顺序按内含子百分比对每个“F”和“V”面进行x轴排序。我怎样才能做到这一点?
例如,“F”面板中的x轴将被命令为:
dy1_PLT4.x4_03, dy1_PLT4.x4_02, dy1_PLT4.x4_01
“V”面板将被命令进入:
dy1_PLT4.x4_01, dy1_PLT4.x4_06, dy1_PLT4.x4_05,
dy1_PLT4.x4_04, dy1_PLT4.x4_02, ... dy1_PLT4.x4_03
答案 0 :(得分:2)
这有点棘手,但如果我们以不同的方式标记不同的方面,那么当我们更改显示标签时,您可以获得所需的内容。诀窍是确保手动添加到绘图中的标签与绘图中显示的顺序相同。我用下面这样做。
#Sort the data by percentage (within each facet) and cell type (between each facet)
sorteddat <- dat[order(dat$perc),]
sorteddat <- sorteddat[order(sorteddat$celltype),]
#Keep only introns for level ordering
reduceddat <- sorteddat[sorteddat$type == "intron",]
#Remember, the label names, we'll need this soon
oldLabels <- unique(reduceddat$nfn)
#Change the labels for F types to something else so we can distinguish them as unique factors
reduceddat$nfn <- as.character(reduceddat$nfn)
reduceddat$nfn[reduceddat$celltype == "F"] <-paste0(reduceddat$nfn[reduceddat$celltype == "F"] ,"_F")
reduceddat$nfn <- factor(reduceddat$nfn)
#We need to do this in the sorted data too, since this is what we'll plot
sorteddat$nfn <- as.character(sorteddat$nfn)
sorteddat$nfn[sorteddat$celltype == "F"] <-paste0(sorteddat$nfn[sorteddat$celltype == "F"] ,"_F")
sorteddat$nfn <- factor(sorteddat$nfn)
#Relevel the nfn variable based on the sorted dataframe
sorteddat$nfn <- factor(sorteddat$nfn, levels = unique(reduceddat$nfn))
#Plot with the addition of the manual labels we stored earlier.
ggplot(sorteddat, aes(x = nfn, y = perc, fill = type)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Set2") +
theme_minimal() +
theme(legend.title = element_blank()) +
theme(axis.text.x=element_text(angle=45, hjust=1,size=5)) +
facet_wrap(~celltype, scales = "free_x") +
scale_x_discrete(labels = oldLabels) +
xlab("")
答案 1 :(得分:2)
一种选择是将数据框分成两个独立的数据框celltype
,分别绘制每个数据框,然后将它们放在一起,就像它们已被刻面一样。然后可以为celltype
的每个级别单独设置因子级别。
以下是一个例子。我也改变了x轴标记。由于每个nfn
值的最终数字标识符都相同,因此您可以删除重复的文本,只将数字绘制为刻度标签,并使用dy1_PLT4.x4
作为x轴标题,情节更容易阅读。
library(tidyverse)
library(scales)
library(egg)
library(grid)
# Split data by celltype and create two plot, stored in a list
p = map(split(dat %>% arrange(celltype, perc), dat$celltype),
~ggplot(.x %>% mutate(nfn=gsub(".*(.{2}$)", "\\1", nfn),
nfn=factor(nfn, levels=nfn[type=="intron"])),
aes(x = nfn, y = perc, fill = type)) +
geom_col() +
scale_fill_brewer(palette = "Set2") +
scale_y_continuous(labels=percent) +
theme_minimal() +
theme(legend.title = element_blank(),
axis.title.x=element_blank()) +
facet_grid(~celltype) +
labs(y="Percent")
)
# Remove y labels and ticks from right plot
p[[2]] = p[[2]] + theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title.y=element_blank())
# Remove legend from left plot
p[[1]] = p[[1]] + guides(fill=FALSE)
# Lay out the two plots
ggarrange(plots=p, ncol=2, widths=c(3,7),
bottom=textGrob(gsub("(.*).{3}$", "\\1", dat$nfn[1]), gp=gpar(fontsize=10)))
另一种选择是仅绘制内含子百分比,因为外显子百分比总是100 - 内含子百分比。如果我们这样做,我们可以利用reorder_within
包中的scale_x_reordered
和drlib
功能(这是一个随时可能更改的&#34;个人&#34;包,如果你想定期使用它们,最好自己制作这些功能的副本。这也具有使用更少墨水来呈现数据的优点。
#devtools::install_github("dgrtwo/drlib")
library(drlib)
ggplot(dat %>% filter(type=="intron") %>%
mutate(nfn=gsub(".*(.{2}$)", "\\1", nfn)),
aes(reorder_within(nfn, perc, celltype), perc)) +
geom_text(aes(label=sprintf("%1.1f", perc*100)), size=3.5) +
facet_grid(.~celltype, scales="free_x", space="free_x") +
scale_x_reordered() +
scale_y_continuous(limits=c(0,1), labels=percent, expand=c(0,0)) +
theme_classic() +
theme(panel.border=element_rect(colour="grey50", fill=NA)) +
labs(x=gsub("(.*).{3}$", "\\1", dat$nfn[1]), y="Percent Intron")