我正在努力按组织类型生成生物功能类别分布的热图,以进行我正在进行的分析。我已经使用geom_tile成功生成了热图,但是希望将网格保持在热图中生成的空白区域内。
之所以产生此空白,是因为这些比较中没有数据(不是NA或零,而是完全不存在)。是否可以1)编辑图形以在空白区域上包含网格,或者2)编辑数据框以在当前不存在那些数据的地方包含NA或零?
这是我的数据:
structure(list(Tissue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("FB",
"SOG", "MG", "HG", "MT"), class = "factor"), Transcript_Count = c(64,
36, 35, 42, 66, 122, 62, 40, 34, 46, 40, 36, 41, 37, 36, 37,
40, 35, 38, 40, 53, 37, 36, 36, 68, 40, 40, 116, 84, 149, 45,
72, 42, 65, 78, 37, 62, 35, 35, 43, 38, 152, 37, 60, 36, 66,
40, 60, 45, 35, 36, 35, 129, 193, 153, 420, 247, 357, 237, 343,
199, 484, 112, 464, 244, 150, 127, 151, 247, 152, 238, 246, 127,
127, 120, 182, 245, 128, 388, 279, 246, 139, 120, 120, 120, 146,
119, 143, 144, 133, 126, 133, 143, 143, 218, 131, 121, 120, 119,
124, 127, 119, 124, 124, 119, 224, 306, 387, 102, 108, 122, 136,
186, 373, 85, 151, 156, 83, 161, 127, 286, 135, 82, 180, 150,
158, 157, 76, 142, 95, 79, 81, 78, 79, 77, 183, 88, 99, 189,
356, 162, 150, 125, 110, 96, 98, 88, 91, 100, 93, 101, 150, 90,
88, 193, 96, 100, 336, 275, 410, 108, 225, 103, 187, 237, 90,
163, 131, 100, 92, 427, 90, 171, 88, 190, 102, 175, 109, 107,
80, 97, 87, 72, 256, 185, 144, 266, 233, 150, 83, 106, 133, 133,
133, 69, 217, 70, 134, 131, 101, 121, 58, 67, 65, 61, 58, 64,
64, 64, 65, 58, 57), GO.ID = structure(c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 19L, 9L, 10L, 6L, 37L,
35L, 8L, 29L, 39L, 42L, 53L, 30L, 34L, 31L, 22L, 49L, 25L, 21L,
1L, 46L, 43L, 36L, 12L, 48L, 5L, 41L, 28L, 32L, 7L, 40L, 23L,
15L, 18L, 33L, 38L, 20L, 47L, 26L, 54L, 11L, 27L, 17L, 44L, 13L,
14L, 51L, 3L, 24L, 16L, 52L, 2L, 45L, 50L, 29L, 6L, 42L, 9L,
39L, 8L, 37L, 35L, 30L, 10L, 1L, 34L, 49L, 25L, 21L, 28L, 7L,
31L, 32L, 48L, 46L, 5L, 27L, 44L, 4L, 47L, 40L, 17L, 33L, 20L,
1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L, 16L, 17L,
19L, 20L, 21L, 22L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L,
33L, 34L, 35L, 36L, 37L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L,
47L, 48L, 49L, 37L, 9L, 8L, 39L, 10L, 30L, 29L, 35L, 42L, 6L,
32L, 21L, 7L, 5L, 25L, 34L, 31L, 28L, 49L, 46L, 1L, 48L, 44L,
11L, 40L, 47L, 55L, 26L, 27L, 17L, 20L, 33L, 13L, 16L), .Label = c("GO:0006139",
"GO:0006351", "GO:0006355", "GO:0006508", "GO:0006725", "GO:0006807",
"GO:0006810", "GO:0007154", "GO:0007165", "GO:0009058", "GO:0009059",
"GO:0009889", "GO:0010467", "GO:0010468", "GO:0010556", "GO:0016070",
"GO:0018130", "GO:0019219", "GO:0019222", "GO:0019438", "GO:0019538",
"GO:0031323", "GO:0031326", "GO:0032774", "GO:0034641", "GO:0034645",
"GO:0034654", "GO:0043170", "GO:0044237", "GO:0044238", "GO:0044249",
"GO:0044260", "GO:0044271", "GO:0046483", "GO:0050794", "GO:0051171",
"GO:0051234", "GO:0051252", "GO:0051716", "GO:0055085", "GO:0060255",
"GO:0071704", "GO:0080090", "GO:0090304", "GO:0097659", "GO:1901360",
"GO:1901362", "GO:1901564", "GO:1901576", "GO:1903506", "GO:2000112",
"GO:2001141", "GO:0003008", "GO:0006811", "GO:0006259"), class = "factor")), row.names = c(NA,
-212L), class = "data.frame")
生成热图的代码:
(ggplot(All_Tissues_BP_Head, aes(Tissue, GO.ID)) +
Alex_Theme +
geom_tile(aes(fill = Transcript_Count), color = "black") +
scale_fill_gradient2(low = "white", mid = "blue", high= "black",
midpoint = mean(All_Tissues_BP$Transcript_Count)) +
scale_x_discrete(expand = c(0,0)) +
ggtitle(expression(atop(bold("Biological Processes")))) +
theme(legend.title = element_text(size=12),
legend.text = element_text(size=12)) +
theme(axis.text = element_text(size=12),
axis.title.y = element_blank(),
axis.title.x = element_text(size = 12)) +
labs(fill = "Transcript \n count"))
答案 0 :(得分:0)
使用tidyr的complete
函数用NA
填充data.frame中的缺失因子组合。
然后,您可以在颜色渐变中使用na.value
参数来设置颜色。
library(ggplot2)
library(dplyr)
library(tidyr)
# z <- complete(All_Tissues_BP_Head, Tissue, GO.ID)
ggplot(complete(All_Tissues_BP_Head, Tissue, GO.ID), aes(Tissue, GO.ID)) +
geom_tile(aes(fill = Transcript_Count), color = "black") +
scale_fill_gradient2(low = "white", mid = "blue", high= "black",
midpoint = mean(All_Tissues_BP_Head$Transcript_Count), na.value="black") +
scale_x_discrete(expand = c(0,0)) +
ggtitle(expression(atop(bold("Biological Processes")))) +
theme(legend.title = element_text(size=12),
legend.text = element_text(size=12)) +
theme(axis.text = element_text(size=12),
axis.title.y = element_blank(),
axis.title.x = element_text(size = 12)) +
labs(fill = "Transcript \n count")