我想绘制一个包括彩色注释栏的热图。数据的背景知识。
我简化了下面的示例数据。
我有患者编号,并希望以热图的形式为每个“ emm_type”绘制每个患者的数值测量值(value_mean)。每个“ emm_type”分为一个“集群”和一个“模式”。因此,我希望热图包括一个彩色面板,该面板描绘了与它们各自的emm_type对齐的这些变量。
这是我的数据样本
> dput(example)
structure(list(id = c("RF0475", "RF0504", "RF0475", "RF0504",
"RF0475", "RF0504", "RF0475", "RF0504", "RF0475", "RF0504", "RF0475",
"RF0475", "RF0504", "RF0504", "RF0475", "RF0504", "RF0475", "RF0475",
"RF0475", "RF0475", "RF0475", "RF0475", "RF0504", "RF0504", "RF0504",
"RF0504", "RF0504", "RF0504", "RF0475", "RF0504", "RF0475", "RF0475",
"RF0504", "RF0504", "RF0475", "RF0475", "RF0475", "RF0475", "RF0475",
"RF0504", "RF0504", "RF0504", "RF0504", "RF0504", "RF0475", "RF0475",
"RF0475", "RF0475", "RF0475", "RF0504", "RF0504", "RF0504", "RF0504",
"RF0504", "RF0475", "RF0475", "RF0475", "RF0475", "RF0504", "RF0504",
"RF0504", "RF0504", "RF0475", "RF0504", "RF0475", "RF0504", "RF0475",
"RF0504", "RF0475", "RF0504", "RF0475", "RF0504", "RF0475", "RF0504"
), cluster = c("a-c2", "a-c2", "a-c3", "a-c3", "a-c4", "a-c4",
"a-c5", "a-c5", "d1", "d1", "d2", "d2", "d2", "d2", "d3", "d3",
"d4", "d4", "d4", "d4", "d4", "d4", "d4", "d4", "d4", "d4", "d4",
"d4", "e1", "e1", "e2", "e2", "e2", "e2", "e3", "e3", "e3", "e3",
"e3", "e3", "e3", "e3", "e3", "e3", "e4", "e4", "e4", "e4", "e4",
"e4", "e4", "e4", "e4", "e4", "e6", "e6", "e6", "e6", "e6", "e6",
"e6", "e6", "m19", "m19", "m218", "m218", "m233", "m233", "m6",
"m6", "m74", "m74", "m95", "m95"), pattern = c("a-c", "a-c",
"a-c", "a-c", "a-c", "a-c", "a-c", "a-c", "d", "d/a-c", "d",
"e", "d", "e", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d",
"d", "d", "d", "d", "e", "e", "e", "e", "e", "e", "e", "e", "e",
"e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e", "e",
"e", "e", "e", "e", "e", "d", "e", "d", "e", "d", "e", "d", "a-c",
"a-c", "d/a-c", "d/a-c", "a-c", "a-c", "a-c", "a-c", "d", "d",
"d", "d"), value_mean = c(1.82898259773807, 2.74970378862732,
2.31836858483114, 1.76297558336274, 6.99379366342489, 2.15775104765085,
9.81401417902465, 5.94493622813449, 6.42938334280903, 4.93258400244736,
4.42293379133012, 35.7119300124525, 85.8843942732351, 6.11004188703959,
4.46626647704635, 5.06748534630747, 2.34493589810343, 3.67864160152857,
3.49413303648271, 4.54325723822265, 11.6241914407818, 6.52797483395025,
2.29277958694861, 7.80004526681732, 2.69910122940354, 3.51802243804242,
6.70909678383865, 4.99681912787639, 5.54367727879201, 9.26383310897086,
4.57249586682161, 4.47787503848692, 12.3177425173967, 15.4240417229311,
4.14187570530094, 32.2447795214283, 2.8171424279428, 3.62644580807153,
79.8173447817745, 2.86868514917333, 4.13675844930625, 2.89891922608397,
120, 5.07500759868863, 3.31961544500323, 9.76557528920087, 4.93060063573198,
4.65192299498109, 66.3579869162384, 2.22596680234449, 5.70995502095345,
4.26850758713846, 120, 25.6383266263976, 2.90543208425715, 8.40935809851042,
2.31807635931822, 8.49055234623605, 3.29831448162297, 3.65068984963035,
1.93567603146573, 2.49808722814557, 3.14095440681389, 2.08508075133288,
3.08360524948663, 1.74613534854807, 1.91624362373354, 3.797786602908,
3.06755845905157, 3.11530841942899, 2.06455239407449, 1.71396244231883,
5.7985222607316, 3.74822367820585), group = c("case", "control",
"case", "control", "case", "control", "case", "control", "case",
"control", "case", "case", "control", "control", "case", "control",
"case", "case", "case", "case", "case", "case", "control", "control",
"control", "control", "control", "control", "case", "control",
"case", "case", "control", "control", "case", "case", "case",
"case", "case", "control", "control", "control", "control", "control",
"case", "case", "case", "case", "case", "control", "control",
"control", "control", "control", "case", "case", "case", "case",
"control", "control", "control", "control", "case", "control",
"case", "control", "case", "control", "case", "control", "case",
"control", "case", "control"), emm_type = structure(c(1L, 1L,
2L, 3L, 4L, 5L, 6L, 6L, 7L, 8L, 9L, 11L, 9L, 11L, 12L, 12L, 13L,
15L, 17L, 19L, 21L, 23L, 13L, 15L, 17L, 19L, 21L, 23L, 24L, 24L,
25L, 27L, 26L, 28L, 29L, 31L, 33L, 35L, 37L, 29L, 31L, 33L, 35L,
37L, 38L, 40L, 42L, 44L, 46L, 38L, 40L, 42L, 44L, 46L, 47L, 49L,
51L, 53L, 47L, 49L, 51L, 53L, 54L, 54L, 55L, 55L, 56L, 56L, 57L,
57L, 58L, 58L, 59L, 59L), .Label = c("197", "1", "238.1", "12",
"39.4", "3.1", "36.2", "54.1", "71", "100", "104", "123", "33",
"41.2", "52", "53", "86", "91", "93.4", "101", "108.1", "116.1",
"225", "4", "68", "76", "90.5", "92", "25", "44", "49", "58",
"82", "87", "103", "113", "118", "2", "8", "22", "28", "77",
"88", "89", "114", "232.1", "11", "42", "59.1", "65", "75", "81",
"85", "19.4", "218.1", "233", "6", "74", "95"), class = "factor", scores = structure(c(`1` = 2,
`2` = 12, `3.1` = 4, `4` = 9, `6` = 17, `8` = 12, `11` = 13,
`12` = 3, `19.4` = 14, `22` = 12, `25` = 11, `28` = 12, `33` = 8,
`36.2` = 5, `39.4` = 3, `41.2` = 8, `42` = 13, `44` = 11, `49` = 11,
`52` = 8, `53` = 8, `54.1` = 5, `58` = 11, `59.1` = 13, `65` = 13,
`68` = 10, `71` = 6, `74` = 18, `75` = 13, `76` = 10, `77` = 12,
`81` = 13, `82` = 11, `85` = 13, `86` = 8, `87` = 11, `88` = 12,
`89` = 12, `90.5` = 10, `91` = 8, `92` = 10, `93.4` = 8, `95` = 19,
`100` = 6, `101` = 8, `103` = 11, `104` = 6, `108.1` = 8, `113` = 11,
`114` = 12, `116.1` = 8, `118` = 11, `123` = 7, `197` = 1, `218.1` = 15,
`225` = 8, `232.1` = 12, `233` = 16, `238.1` = 2), .Dim = 59L, .Dimnames = list(
c("1", "2", "3.1", "4", "6", "8", "11", "12", "19.4", "22",
"25", "28", "33", "36.2", "39.4", "41.2", "42", "44", "49",
"52", "53", "54.1", "58", "59.1", "65", "68", "71", "74",
"75", "76", "77", "81", "82", "85", "86", "87", "88", "89",
"90.5", "91", "92", "93.4", "95", "100", "101", "103", "104",
"108.1", "113", "114", "116.1", "118", "123", "197", "218.1",
"225", "232.1", "233", "238.1"))))), row.names = c(NA, -74L
), class = c("tbl_df", "tbl", "data.frame"))
我已经用以下代码绘制了案例和控件的热图:
(cases_heatmap <- ggplot(filter(example, group == "case"), aes(id, factor(emm_type)))+geom_tile(aes(fill=value_mean), colour="white")+
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 60,limits=c(0,max(example$value_mean)))+
scale_y_discrete(expand = c(0, 0)) +
theme(axis.ticks=element_blank(),
axis.text.x=element_text(angle = 90, vjust = 0.6),legend.position = "none")+
coord_equal())
(cases_heatmap <- ggplot(filter(example, group == "control"), aes(id, factor(emm_type)))+geom_tile(aes(fill=value_mean), colour="white")+
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 60,limits=c(0,max(example$value_mean)))+
scale_y_discrete(expand = c(0, 0)) +
theme(axis.ticks=element_blank(),
axis.text.x=element_text(angle = 90, vjust = 0.6),legend.position = "none")+
coord_equal())
这给了我类似的东西(一个用于案例,一个用于控件:
为了沿着侧面绘制聚类和图案,我对数据进行了一些调整,以获得可以绘制的列(使用“ cluster_text”和“ pattern_text”列),并有一个要按(num_cluster ):
example <- example%>%
mutate(num_cluster = as.numeric(factor(example$cluster))) %>%
mutate(num_pattern = as.numeric(factor(example$pattern))) %>%
mutate(cluster_text = "Cluster") %>%
mutate(pattern_text = "Pattern")
[1]: https://i.stack.imgur.com/CO1eP.jpg
因为我希望将群集分组在一起,所以我对级别进行了重新排序:
example$emm_type <- reorder(example$emm_type, example$cluster)
然后,为了获得带有颜色的注释栏(“簇”和“模式”),我想沿着热图绘制颜色,然后绘制新创建的“ cluster_text”和“ pattern_text”列的另一个geom_tile:
cluster_annotation <- ggplot(filter(example, group == "case"), aes(cluster_text, factor(emm_type)))+geom_tile(aes(fill=cluster), colour="white")+
coord_equal()+
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank())
pattern_annotation <- ggplot(filter(example, group == "case"), aes(pattern_text, factor(emm_type)))+geom_tile(aes(fill=pattern), colour="white")+
coord_equal()+
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank())
这给了我所需的注释图块(对于集群,这是我的,对于模式,我得到了相同的东西):
现在,我希望所有图块彼此相邻,甚至希望绘制在同一geom_tile上,以使emm_types与其各自的图案和簇对齐,但是我一生都无法弄清楚该如何做。
这是我的最终图形的图片,当我使用更多数据时,这些图形彼此相邻对齐:
> sessionInfo()
R version 3.5.0 (2018-04-23)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.6
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
locale:
[1] en_NZ.UTF-8/en_NZ.UTF-8/en_NZ.UTF-8/C/en_NZ.UTF-8/en_NZ.UTF-8
attached base packages:
[1] grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] bindrcpp_0.2.2 cowplot_0.9.3 scales_0.5.0 forcats_0.3.0 stringr_1.3.1 dplyr_0.7.6 purrr_0.2.5 readr_1.1.1 tidyr_0.8.1 tibble_1.4.2
[11] ggplot2_3.0.0 tidyverse_1.2.1 readxl_1.1.0
loaded via a namespace (and not attached):
[1] Rcpp_0.12.18 cellranger_1.1.0 pillar_1.3.0 compiler_3.5.0 plyr_1.8.4 bindr_0.1.1 tools_3.5.0 digest_0.6.15 lubridate_1.7.4
[10] jsonlite_1.5 nlme_3.1-137 gtable_0.2.0 lattice_0.20-35 pkgconfig_2.0.1 rlang_0.2.1 cli_1.0.0 rstudioapi_0.7 yaml_2.2.0
[19] haven_1.1.2 withr_2.1.2 xml2_1.2.0 httr_1.3.1 hms_0.4.2 tidyselect_0.2.4 glue_1.3.0 R6_2.2.2 fansi_0.2.3
[28] reshape2_1.4.3 modelr_0.1.2 magrittr_1.5 backports_1.1.2 rvest_0.3.2 assertthat_0.2.0 colorspace_1.3-2 labeling_0.3 utf8_1.1.4
[37] stringi_1.2.4 lazyeval_0.2.1 munsell_0.5.0 broom_0.5.0 crayon_1.3.4
答案 0 :(得分:3)
要在同一geom_tile()
上绘制“群集”列和“模式”列,我们需要先将数据从宽变长到长整形。
library(tidyr)
example %>%
gather(annotation, value, cluster, pattern) %>%
ggplot(., aes(annotation, factor(emm_type)))+geom_tile(aes(fill=value), colour="white")+
coord_equal()+
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.6)) -> p_annotation
合并情节。
library(patchwork)
cases_heatmap + controls_heatmap + p_annotation
第一次尝试
有一些软件包*可以组合ggplot对象,其中一个是patchwork
。
# install.packages("devtools")
# devtools::install_github("thomasp85/patchwork")
library(patchwork)
cases_heatmap + controls_heatmap + cluster_annotation + pattern_annotation +
plot_layout(nrow = 1)
*其他软件包为egg
,cowplot
,multipanelfigure
,...。