热图中缺少值

时间:2019-05-03 16:19:26

标签: r ggplot2 graphics heatmap

我正在努力按组织类型生成生物功能类别分布的热图,以进行我正在进行的分析。我已经使用geom_tile成功生成了热图,但是希望将网格保持在热图中生成的空白区域内。

enter image description here

之所以产生此空白,是因为这些比较中没有数据(不是NA或零,而是完全不存在)。是否可以1)编辑图形以在空白区域上包含网格,或者2)编辑数据框以在当前不存在那些数据的地方包含NA或零?

这是我的数据:

structure(list(Tissue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("FB", 
"SOG", "MG", "HG", "MT"), class = "factor"), Transcript_Count = c(64, 
36, 35, 42, 66, 122, 62, 40, 34, 46, 40, 36, 41, 37, 36, 37, 
40, 35, 38, 40, 53, 37, 36, 36, 68, 40, 40, 116, 84, 149, 45, 
72, 42, 65, 78, 37, 62, 35, 35, 43, 38, 152, 37, 60, 36, 66, 
40, 60, 45, 35, 36, 35, 129, 193, 153, 420, 247, 357, 237, 343, 
199, 484, 112, 464, 244, 150, 127, 151, 247, 152, 238, 246, 127, 
127, 120, 182, 245, 128, 388, 279, 246, 139, 120, 120, 120, 146, 
119, 143, 144, 133, 126, 133, 143, 143, 218, 131, 121, 120, 119, 
124, 127, 119, 124, 124, 119, 224, 306, 387, 102, 108, 122, 136, 
186, 373, 85, 151, 156, 83, 161, 127, 286, 135, 82, 180, 150, 
158, 157, 76, 142, 95, 79, 81, 78, 79, 77, 183, 88, 99, 189, 
356, 162, 150, 125, 110, 96, 98, 88, 91, 100, 93, 101, 150, 90, 
88, 193, 96, 100, 336, 275, 410, 108, 225, 103, 187, 237, 90, 
163, 131, 100, 92, 427, 90, 171, 88, 190, 102, 175, 109, 107, 
80, 97, 87, 72, 256, 185, 144, 266, 233, 150, 83, 106, 133, 133, 
133, 69, 217, 70, 134, 131, 101, 121, 58, 67, 65, 61, 58, 64, 
64, 64, 65, 58, 57), GO.ID = structure(c(1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 19L, 9L, 10L, 6L, 37L, 
35L, 8L, 29L, 39L, 42L, 53L, 30L, 34L, 31L, 22L, 49L, 25L, 21L, 
1L, 46L, 43L, 36L, 12L, 48L, 5L, 41L, 28L, 32L, 7L, 40L, 23L, 
15L, 18L, 33L, 38L, 20L, 47L, 26L, 54L, 11L, 27L, 17L, 44L, 13L, 
14L, 51L, 3L, 24L, 16L, 52L, 2L, 45L, 50L, 29L, 6L, 42L, 9L, 
39L, 8L, 37L, 35L, 30L, 10L, 1L, 34L, 49L, 25L, 21L, 28L, 7L, 
31L, 32L, 48L, 46L, 5L, 27L, 44L, 4L, 47L, 40L, 17L, 33L, 20L, 
1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L, 16L, 17L, 
19L, 20L, 21L, 22L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 
33L, 34L, 35L, 36L, 37L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 
47L, 48L, 49L, 37L, 9L, 8L, 39L, 10L, 30L, 29L, 35L, 42L, 6L, 
32L, 21L, 7L, 5L, 25L, 34L, 31L, 28L, 49L, 46L, 1L, 48L, 44L, 
11L, 40L, 47L, 55L, 26L, 27L, 17L, 20L, 33L, 13L, 16L), .Label = c("GO:0006139", 
"GO:0006351", "GO:0006355", "GO:0006508", "GO:0006725", "GO:0006807", 
"GO:0006810", "GO:0007154", "GO:0007165", "GO:0009058", "GO:0009059", 
"GO:0009889", "GO:0010467", "GO:0010468", "GO:0010556", "GO:0016070", 
"GO:0018130", "GO:0019219", "GO:0019222", "GO:0019438", "GO:0019538", 
"GO:0031323", "GO:0031326", "GO:0032774", "GO:0034641", "GO:0034645", 
"GO:0034654", "GO:0043170", "GO:0044237", "GO:0044238", "GO:0044249", 
"GO:0044260", "GO:0044271", "GO:0046483", "GO:0050794", "GO:0051171", 
"GO:0051234", "GO:0051252", "GO:0051716", "GO:0055085", "GO:0060255", 
"GO:0071704", "GO:0080090", "GO:0090304", "GO:0097659", "GO:1901360", 
"GO:1901362", "GO:1901564", "GO:1901576", "GO:1903506", "GO:2000112", 
"GO:2001141", "GO:0003008", "GO:0006811", "GO:0006259"), class = "factor")), row.names = c(NA, 
-212L), class = "data.frame")

生成热图的代码:

(ggplot(All_Tissues_BP_Head, aes(Tissue, GO.ID)) +
    Alex_Theme +
    geom_tile(aes(fill = Transcript_Count), color = "black") +
    scale_fill_gradient2(low = "white", mid = "blue", high= "black", 
                         midpoint =  mean(All_Tissues_BP$Transcript_Count)) +
    scale_x_discrete(expand = c(0,0)) +
    ggtitle(expression(atop(bold("Biological Processes")))) +
    theme(legend.title = element_text(size=12),
          legend.text = element_text(size=12)) +
    theme(axis.text = element_text(size=12),
        axis.title.y = element_blank(),
        axis.title.x = element_text(size = 12)) +
    labs(fill = "Transcript \n count"))

1 个答案:

答案 0 :(得分:0)

使用tidyr的complete函数用NA填充data.frame中的缺失因子组合。

然后,您可以在颜色渐变中使用na.value参数来设置颜色。

library(ggplot2)
library(dplyr)
library(tidyr)

# z <- complete(All_Tissues_BP_Head, Tissue, GO.ID)

ggplot(complete(All_Tissues_BP_Head, Tissue, GO.ID), aes(Tissue, GO.ID)) +
    geom_tile(aes(fill = Transcript_Count), color = "black") +
    scale_fill_gradient2(low = "white", mid = "blue", high= "black", 
                         midpoint =  mean(All_Tissues_BP_Head$Transcript_Count), na.value="black") +
    scale_x_discrete(expand = c(0,0)) +
    ggtitle(expression(atop(bold("Biological Processes")))) +
    theme(legend.title = element_text(size=12),
          legend.text = element_text(size=12)) +
    theme(axis.text = element_text(size=12),
          axis.title.y = element_blank(),
          axis.title.x = element_text(size = 12)) +
    labs(fill = "Transcript \n count")