每组具有多个或不同图例的堆叠条形图

时间:2020-06-05 04:28:29

标签: ggplot2 legend ggnewscale

是否可以使用ggplot生成以下链接中所示的barplot?

https://photos.app.goo.gl/E3MC461dKaTZfHza9

这是我所做的

library(ggplot2)

df <- read.csv(text=
"trt,gene,freq,cols
M6,ALDH16A1,100.0000000,red
M6,Others,0.0000000,lightgrey
M12,ALDH16A1,64.6638015,red
M12,GBE1,2.0074865,#4C00FF
M12,ZNF598,1.5832525,#004CFF
M12,CHMP6,1.3503397,#00E5FF
M12,C20orf27,1.2033828,#00FF4D
M12,NEGR1,0.9676972,#4DFF00
M12,TNFAIP6,0.9122418,#E6FF00
M12,ZSCAN25,0.7375572,#FFFF00
M12,BCL2,0.6848745,#FFDE59
M12,CBL,0.6765562,#FFE0B3
M12,Others,25.2128102,lightgrey
M18,ALDH16A1,42.4503581,red
M18,ATF2,2.2360682,#4C00FF
M18,DIAPH1,1.5256507,#004CFF
M18,SESTD1,1.2053805,#00E5FF
M18,TFCP2,1.1587958,#00FF4D
M18,SCAPER,1.1180341,#4DFF00
M18,CUX1,1.0306877,#E6FF00
M18,TEX10,0.9841030,#FFFF00
M18,C6orf89,0.9666337,#FFDE59
M18,PTTG1IP,0.9258720,#FFE0B3
M18,Others,46.3984161,lightgrey")

df$trt <- factor(df$trt,levels=unique(as.character(df$trt)))
df$gene <- factor(df$gene,levels = unique(as.character(df$gene)))

ggplot(df, aes(x=trt,y=freq, fill = gene))+geom_bar(stat = "identity", width = 0.5,color="black") + theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 4))

df $ cols是我要用来标记M6,M12,M18中不同基因的颜色,如图所示,在每个栏中,df $ gene的“其他”始终位于M6栏中的底部,M12,M18

谢谢

爱明

我修改了您的代码以为另一个数据集生成类似的图,但是我遇到了一些新问题,例如:

library(dplyr)
library(tidyverse)
library(ggnewscale)

df <- read.csv(text='"trt","gene","freq","cols"
                 "100.0.250ng_CellLine_0","ALDH16A1",100,"red"
                 "100.0.250ng_CellLine_0","Others",0,"lightgrey"
                 "75.25.250ng_CellLine_0","ALDH16A1",64.6638014695688,"red"
                 "75.25.250ng_CellLine_0","GBE1",2.0074864827395,"#4C00FF"
                 "75.25.250ng_CellLine_0","ZNF598",1.5832524608346,"#004CFF"
                 "75.25.250ng_CellLine_0","CHMP6",1.35033966449466,"#00E5FF"
                 "75.25.250ng_CellLine_0","C20orf27",1.2033827810897,"#00FF4D"
                 "75.25.250ng_CellLine_0","NEGR1",0.967697213364758,"#4DFF00"
                 "75.25.250ng_CellLine_0","TNFAIP6",0.912241785664772,"#E6FF00"
                 "75.25.250ng_CellLine_0","ZSCAN25",0.737557188409816,"#FFFF00"
                 "75.25.250ng_CellLine_0","BCL2",0.684874532094829,"#FFDE59"
                 "75.25.250ng_CellLine_0","CBL",0.676556217939831,"#FFE0B3"
                 "75.25.250ng_CellLine_0","Others",25.2128102037987,"lightgrey"
                 "50.50.250ng_CellLine_0","ALDH16A1",42.4503581203051,"red"
                 "50.50.250ng_CellLine_0","ATF2",2.23606824666628,"#4C00FF"
                 "50.50.250ng_CellLine_0","DIAPH1",1.52565073079835,"#004CFF"
                 "50.50.250ng_CellLine_0","SESTD1",1.20538053921854,"#00E5FF"
                 "50.50.250ng_CellLine_0","TFCP2",1.15879578407966,"#00FF4D"
                 "50.50.250ng_CellLine_0","SCAPER",1.11803412333314,"#4DFF00"
                 "50.50.250ng_CellLine_0","CUX1",1.03068770744774,"#E6FF00"
                 "50.50.250ng_CellLine_0","TEX10",0.984102952308857,"#FFFF00"
                 "50.50.250ng_CellLine_0","C6orf89",0.966633669131777,"#FFDE59"
                 "50.50.250ng_CellLine_0","PTTG1IP",0.925872008385256,"#FFE0B3"
                 "50.50.250ng_CellLine_0","Others",46.3984161183253,"lightgrey"
                 "10.90.250ng_CellLine_0","ALDH16A1",4.68952007835455,"red"
                 "10.90.250ng_CellLine_0","STK11",1.93143976493634,"#4C00FF"
                 "10.90.250ng_CellLine_0","ERGIC2",1.46523016650343,"#004CFF"
                 "10.90.250ng_CellLine_0","EFR3A",1.1126346718903,"#00E5FF"
                 "10.90.250ng_CellLine_0","TMEM235",1.03819784524976,"#00FF4D"
                 "10.90.250ng_CellLine_0","NGLY1",1.01469147894221,"#4DFF00"
                 "10.90.250ng_CellLine_0","CNOT10",0.991185112634672,"#E6FF00"
                 "10.90.250ng_CellLine_0","NPLOC4",0.983349657198825,"#FFFF00"
                 "10.90.250ng_CellLine_0","GZMB",0.928501469147894,"#FFDE59"
                 "10.90.250ng_CellLine_0","KIF2C",0.924583741429971,"#FFE0B3"
                 "10.90.250ng_CellLine_0","Others",84.9206660137121,"lightgrey"
                 "1.99.250ng_CellLine_0","DNAH1",2.36284289276808,"red"
                 "1.99.250ng_CellLine_0","ALOX5AP",2.29426433915212,"#4C00FF"
                 "1.99.250ng_CellLine_0","SEPT7",1.78304239401496,"#004CFF"
                 "1.99.250ng_CellLine_0","TCF20",1.35910224438903,"#00E5FF"
                 "1.99.250ng_CellLine_0","USP32",1.27805486284289,"#00FF4D"
                 "1.99.250ng_CellLine_0","MUS81",1.24688279301746,"#4DFF00"
                 "1.99.250ng_CellLine_0","CEP44",1.22817955112219,"#E6FF00"
                 "1.99.250ng_CellLine_0","TMEM164",1.20324189526185,"#FFFF00"
                 "1.99.250ng_CellLine_0","RAP1B",1.18453865336658,"#FFDE59"
                 "1.99.250ng_CellLine_0","GSN",1.14713216957606,"#FFE0B3"
                 "1.99.250ng_CellLine_0","Others",84.9127182044888,"lightgrey"
                 "0.100.250ng_CellLine_0","RTN3",2.3050199437531,"red"
                 "0.100.250ng_CellLine_0","CHTF18",1.67637814091135,"#4C00FF"
                 "0.100.250ng_CellLine_0","RNPS1",1.41168685550429,"#004CFF"
                 "0.100.250ng_CellLine_0","RBKS",1.05325073984891,"#00E5FF"
                 "0.100.250ng_CellLine_0","ZNF805",0.987077918497142,"#00FF4D"
                 "0.100.250ng_CellLine_0","TMBIM6",0.865761079352242,"#4DFF00"
                 "0.100.250ng_CellLine_0","RP3-449O17.1",0.841865338308549,"#E6FF00"
                 "0.100.250ng_CellLine_0","RNASEH2A",0.814293329411981,"#FFFF00"
                 "0.100.250ng_CellLine_0","FAM46A",0.810617061559105,"#FFDE59"
                 "0.100.250ng_CellLine_0","CYB561A3",0.79775012407404,"#FFE0B3"
                 "0.100.250ng_CellLine_0","Others",88.4362994687793,"lightgrey"
                 "100.0.500ng_CellLine_0","ALDH16A1",100,"red"
                 "100.0.500ng_CellLine_0","Others",0,"lightgrey"
                 "75.25.500ng_CellLine_0","ALDH16A1",64.6680558047111,"red"
                 "75.25.500ng_CellLine_0","STX18",0.76034608856445,"#4C00FF"
                 "75.25.500ng_CellLine_0","BCL7A",0.685829412008224,"#004CFF"
                 "75.25.500ng_CellLine_0","PTPRC",0.634771689182662,"#00E5FF"
                 "75.25.500ng_CellLine_0","GABRB1",0.626492058454193,"#00FF4D"
                 "75.25.500ng_CellLine_0","EDNRB",0.59751335090455,"#4DFF00"
                 "75.25.500ng_CellLine_0","TBC1D10C",0.538175997350518,"#E6FF00"
                 "75.25.500ng_CellLine_0","SRGAP2B",0.534036181986283,"#FFFF00"
                 "75.25.500ng_CellLine_0","RABGAP1",0.527136489712559,"#FFDE59"
                 "75.25.500ng_CellLine_0","CD44",0.485738336070211,"#FFE0B3"
                 "75.25.500ng_CellLine_0","Others",29.9419045910552,"lightgrey"
                 "50.50.500ng_CellLine_0","ALDH16A1",40.5808575357307,"red"
                 "50.50.500ng_CellLine_0","TNPO1",0.979207466977791,"#4C00FF"
                 "50.50.500ng_CellLine_0","RNA5SP443",0.93337222384266,"#004CFF"
                 "50.50.500ng_CellLine_0","MND1",0.912538022417601,"#00E5FF"
                 "50.50.500ng_CellLine_0","RB1",0.900037501562565,"#00FF4D"
                 "50.50.500ng_CellLine_0","PTPRA",0.791699654152256,"#4DFF00"
                 "50.50.500ng_CellLine_0","SUCNR1",0.783365973582233,"#E6FF00"
                 "50.50.500ng_CellLine_0","MIR1284",0.625026042751781,"#FFFF00"
                 "50.50.500ng_CellLine_0","RWDD1",0.587524480186674,"#FFDE59"
                 "50.50.500ng_CellLine_0","NTN1",0.575023959331639,"#FFE0B3"
                 "50.50.500ng_CellLine_0","Others",52.3313471394641,"lightgrey"
                 "10.90.500ng_CellLine_0","ALDH16A1",7.05601485476812,"red"
                 "10.90.500ng_CellLine_0","ENTPD5",1.4722136257129,"#4C00FF"
                 "10.90.500ng_CellLine_0","MFSD10",1.28210796233255,"#004CFF"
                 "10.90.500ng_CellLine_0","LENG8-AS1",0.915159821389098,"#00E5FF"
                 "10.90.500ng_CellLine_0","FRMD4B",0.884212387815553,"#00FF4D"
                 "10.90.500ng_CellLine_0","TWISTNB",0.853264954242009,"#4DFF00"
                 "10.90.500ng_CellLine_0","ZNF544",0.778106901277687,"#E6FF00"
                 "10.90.500ng_CellLine_0","NUDCD1",0.738317343825987,"#FFFF00"
                 "10.90.500ng_CellLine_0","PHF20",0.720633096069676,"#FFDE59"
                 "10.90.500ng_CellLine_0","HNRNPK",0.702948848313365,"#FFE0B3"
                 "10.90.500ng_CellLine_0","Others",84.5970202042531,"lightgrey"
                 "1.99.500ng_CellLine_0","SND1",2.97318305479984,"red"
                 "1.99.500ng_CellLine_0","ATF1",2.18940277237984,"#4C00FF"
                 "1.99.500ng_CellLine_0","CARM1",1.96916699054282,"#004CFF"
                 "1.99.500ng_CellLine_0","OR4K15",1.28902707604612,"#00E5FF"
                 "1.99.500ng_CellLine_0","MTMR3",1.26311698406529,"#00FF4D"
                 "1.99.500ng_CellLine_0","CDK13",1.13356652416116,"#4DFF00"
                 "1.99.500ng_CellLine_0","RNU6-385P",1.0752688172043,"#E6FF00"
                 "1.99.500ng_CellLine_0","SLC4A2",0.809690374400829,"#FFFF00"
                 "1.99.500ng_CellLine_0","TMF1",0.770825236429589,"#FFDE59"
                 "1.99.500ng_CellLine_0","MAN1A1",0.738437621453556,"#FFE0B3"
                 "1.99.500ng_CellLine_0","Others",85.7883145485167,"lightgrey"
                 "0.100.500ng_CellLine_0","ALYREF",1.53269861089433,"red"
                 "0.100.500ng_CellLine_0","HCG18",1.51084751053535,"#4C00FF"
                 "0.100.500ng_CellLine_0","RNU7-146P",0.922428593725613,"#004CFF"
                 "0.100.500ng_CellLine_0","ST3GAL3",0.884969564538786,"#00E5FF"
                 "0.100.500ng_CellLine_0","HSF1",0.811612299047916,"#00FF4D"
                 "0.100.500ng_CellLine_0","HP1BP3",0.792882784454503,"#4DFF00"
                 "0.100.500ng_CellLine_0","DAOA",0.736694240674262,"#E6FF00"
                 "0.100.500ng_CellLine_0","CDK13",0.689870454190729,"#FFFF00"
                 "0.100.500ng_CellLine_0","PDXDC1",0.680505696894022,"#FFDE59"
                 "0.100.500ng_CellLine_0","CKAP5",0.647729046355549,"#FFE0B3"
                 "0.100.500ng_CellLine_0","Others",90.7897611986889,"lightgrey"'
                 ,sep=",",header=T)

g <- unique(as.character(df$gene))
i <- which(g == "Others")
g <- c(g[-i], g[i])

df$trt <- factor(df$trt,levels=unique(as.character(df$trt)))
df$gene <- factor(df$gene,levels = g)

cols <- dplyr::select(df, gene, cols) %>% 
  distinct() %>% 
  deframe()

tr <- levels(df$trt)

p <- ggplot() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[1]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[1], ncol = 1, title.position = "top")) + 
  new_scale_fill() + # Define scales before initiating a new one
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[2]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[2], ncol = 1, title.position = "top")) +
  new_scale_fill() + # Define scales before initiating a new one
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[3]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[3], ncol = 1, title.position = "top")) +
  new_scale_fill() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[4]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[4], ncol = 1, title.position = "top")) +
  new_scale_fill() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[5]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[5], ncol = 1, title.position = "top")) +
  new_scale_fill() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[6]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[6], ncol = 1, title.position = "top")) +
  new_scale_fill() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[7]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[7], ncol = 1, title.position = "top")) +
  new_scale_fill() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[8]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[8], ncol = 1, title.position = "top")) +
  new_scale_fill() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[9]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[9], ncol = 1, title.position = "top")) +
  new_scale_fill() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[10]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[10], ncol = 1, title.position = "top")) +
  new_scale_fill() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[11]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[11], ncol = 1, title.position = "top")) +
  new_scale_fill() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[12]), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = tr[12], ncol = 1, title.position = "top")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 4), legend.position = "bottom", legend.justification = 0)

p

我希望堆叠的条形及其图例遵循从左到右的tr顺序,如下所示:

“ 100.0.250ng_CellLine_0”“ 75.25.250ng_CellLine_0”“ 50.50.250ng_CellLine_0”“ 10.90.250ng_CellLine_0”“ 1.99.250ng_CellLine_0”“ 0.100.250ng_CellLine_0”“ 100.0.500ng_CellLine_0”“ 75.25.500ng_CellLine_0”“ 50.50 .500ng_CellLine_0“” 1.99.500ng_CellLine_0“” 0.100.500ng_CellLine_0“

但是,似乎上面的代码并没有按此顺序生成堆积条形图

此外, 对于df中的'0.100.500ng_CellLine_0',堆叠条中的基因和颜色顺序与df中的顺序不同:

                  0.100.500ng_CellLine_0       ALYREF   1.5326986       red
                  0.100.500ng_CellLine_0        HCG18   1.5108475   #4C00FF
                  0.100.500ng_CellLine_0    RNU7-146P   0.9224286   #004CFF
                  0.100.500ng_CellLine_0      ST3GAL3   0.8849696   #00E5FF
                  0.100.500ng_CellLine_0         HSF1   0.8116123   #00FF4D
                  0.100.500ng_CellLine_0       HP1BP3   0.7928828   #4DFF00
                  0.100.500ng_CellLine_0         DAOA   0.7366942   #E6FF00
                  0.100.500ng_CellLine_0        CDK13   0.6898705   #FFFF00
                  0.100.500ng_CellLine_0       PDXDC1   0.6805057   #FFDE59
                  0.100.500ng_CellLine_0        CKAP5   0.6477290   #FFE0B3
                  0.100.500ng_CellLine_0       Others  90.7897612 lightgrey'  

另一个问题是:

tr有12种处理方式,我必须为每种处理方式添加new_scale_fill(),所以我得到的代码很长,有可能简单地做到这一点吗?

谢谢 爱明

1 个答案:

答案 0 :(得分:1)

尝试一下。只需对因子进行重新排序,然后使用scale_fill_manual来设置填充颜色。

library(tidyverse)

df$trt <- factor(df$trt,levels=unique(as.character(df$trt)))
df$gene <- factor(df$gene,levels = unique(as.character(df$gene)))
# Reorder factor
df$gene <- forcats::fct_relevel(df$gene, "Others", after = 0)
df$gene <- forcats::fct_rev(df$gene)

# named vector of fill colors
cols <- select(df, gene, cols) %>% 
  distinct() %>% 
  deframe()

p <- ggplot(df, aes(x = trt, y = freq, fill = gene)) + 
  geom_bar(stat = "identity", color = "black") + 
  scale_fill_manual(values = cols) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 4))

reprex package(v0.3.0)于2020-06-05创建

编辑,可以通过ggnewscale::new_scale_fill获得单个组的单独图例。为了获得沿x轴的正确顺序,我使用了构面。试试这个:

library(tidyverse)
library(ggnewscale)

df$trt <- factor(df$trt,levels=unique(as.character(df$trt)))
df$gene <- factor(df$gene,levels = unique(as.character(df$gene)))
# Reorder factor
df$gene <- forcats::fct_relevel(df$gene, "Others", after = 0)
df$gene <- forcats::fct_rev(df$gene)

# named vector of fill colors
cols <- select(df, gene, cols) %>% 
  distinct() %>% 
  deframe()

p <- ggplot() + 
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = filter(df, trt == "M6"), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = "M6", ncol = 2, title.position = "top")) + 
  new_scale_fill() + # Define scales before initiating a new one
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = filter(df, trt == "M12"), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = "M12", ncol = 2, title.position = "top")) +
  new_scale_fill() + # Define scales before initiating a new one
  geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = filter(df, trt == "M18"), stat = "identity", color = "black") + 
  scale_fill_manual(values = cols, guide = guide_legend(title = "M18", ncol = 2, title.position = "top")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 4), legend.position = "bottom", legend.justification = 0) +
  facet_wrap(~ trt, scales = "free_x")
p

reprex package(v0.3.0)于2020-06-05创建

编辑2

  1. 要简化代码,可以使用循环。我使用了一些辅助函数和purrr::reduce,但是一个简单的for循环也可以完成这项工作。

  2. 但是,x轴的重新排序需要一点技巧。问题在于,通过拆分数据,我们失去了类别的顺序。作为解决方案,我使用facetting重新输入顺序,但摆脱了striptext和facet之间的间距。

library(dplyr)
library(tidyverse)
library(ggnewscale)

g <- unique(as.character(df$gene))
i <- which(g == "Others")
g <- c(g[-i], g[i])

# Order and trim trt
df$trt <- stringr::str_trim(df$trt)
df$trt <- forcats::fct_inorder(df$trt)
tr <- levels(df$trt)


col_vec <- dplyr::select(df, gene, cols) %>% 
  distinct() %>% 
  deframe()

# Helper functions
make_df <- function(d, x) {
  filter(d, trt == tr[x]) %>% 
    mutate(gene = forcats::fct_inorder(gene),
           gene = forcats::fct_relevel(gene, "Others", after = length(levels(gene)) - 1)) %>% 
    arrange(gene) %>% 
    mutate(gene_order = as.numeric(gene))
} 

# geom
help_geom <- function(x) {
  geom_bar(aes(x = trt, y = freq, fill = gene), data = df_list[[x]], stat = "identity", color = "black")
}
# scale
help_scale <- function(x) {
  scale_fill_manual(values = col_vec, 
                    guide = guide_legend(order = x, title = tr[x], ncol = 1, 
                                         title.position = "top", title.theme = element_text(size = 4)))
}
# help for the loop
help_reduce <- function(p, x) {
  p + new_scale_fill() + help_geom(x) + help_scale(x)
}

# List of df
df_list <- map(1:12, ~ make_df(df, .x))
# Init plot
p <- ggplot() + help_geom(1) + help_scale(1)
# Loop over trt
p <- reduce(c(2:12), help_reduce, .init = p)

# Add theme and wrap
p + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 4), 
        legend.text = element_text(size = 6), 
        legend.position = "bottom", legend.justification = 0,
        strip.text = element_blank(),
        panel.spacing.x = unit(0, "pt")) +
  facet_wrap(~trt, scales = "free_x", nrow = 1)

reprex package(v0.3.0)于2020-06-06创建