在p值的ggplot geom_tile中格式化颜色和图例

时间:2014-02-11 22:17:30

标签: r colors ggplot2 legend p-value

我目前正在努力制作一张热图'使用ggplot2显示一系列p值,但无法弄清楚如何定制实际的颜色分配和图例。

sampledata.m <- melt(sampledata)
sampledata.m$var2 <- as.character(sampledata.m$var2)
sampledata.m$var2 <- factor(sampledata.m$var2, levels=unique(sampledata.m$var2),ordered=TRUE)
sampledata.m$var1 <- as.character(sampledata.m$var1)
sampledata.m$var1 <- factor(sampledata.m$var1, levels=unique(sampledata.m$var1),ordered=TRUE)

这样做是为了保持变量的顺序。

p <- ggplot(sampledata.m, aes(var2, var1)) + 
  geom_tile(aes(fill = value), colour = "transparent") +    
  scale_fill_gradientn(colours=c("light green","dark green", "black"),
    values=rescale(c(0,0.0003,0.05,0.5,1)),limits=c(0,1)))
p + theme_bw(base_size = base_size) + labs(x = "", y = "") + 
  scale_x_discrete(expand = c(0,0)) + 
  theme(legend.position = "bottom", axis.ticks = element_blank(), 
    axis.text.x = element_text(size = base_size * 0.8, angle = 310, 
    hjust = 0, colour = "black"))

这会创建一个漂亮的情节,但我的图例和我的颜色渐变并不代表我指定的重新缩放。如果这是一个简单的解决办法,请原谅我的无知,但我现在只编写R约2周。理想情况下,我希望我的情节和图例能够模仿与本文类似的配色方案和图例标签: http://www.ncbi.nlm.nih.gov/pubmed/22496159

structure(list(var1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L
), .Label = c("A", "B", "C", 
"D", "E"), class = "factor"), var2 = structure(c(1L, 
5L, 23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L, 
3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 23L, 18L, 9L, 
8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L, 3L, 19L, 16L, 
4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 23L, 18L, 9L, 8L, 14L, 12L, 
20L, 6L, 21L, 11L, 2L, 22L, 10L, 3L, 19L, 16L, 4L, 7L, 15L, 17L, 
13L, 24L, 1L, 5L, 23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 
2L, 22L, 10L, 3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 
23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L, 
3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L), .Label = c("1", "2", 
"3", "4", "5", "6", "7", "8", 
"9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19", "20", "21", 
"22", "23", "24"), class = "factor"), variable = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), .Label = "pvalue", class = "factor"), 
value = c(0.810172671, 0.596026338, 0.076550169, 0.908670635, 
0.300418653, 0.051553286, 0.124196482, 0.601568833, 0.058431468, 
0.341726981, 0.876674726, 0.002698295, 0.812059425, 0.068199656, 
0.758383287, 0.60362134, 0.89265723, 0.246111936, 0.156348035, 
0.909574522, 0.020202377, 0.388843992, 0.769441835, 0.102272916, 
0.38895717, 0.882296525, 0.792438683, 0.000491393, 0.004233434, 
0.202424095, 0.426941568, 0.08520186, 0.763036306, 0.602828564, 
0.037278697, 0.121642743, 0.669123606, 0.974328438, 0.834329923, 
0.050413697, 0.078476666, 0.387647156, 0.000540422, 0.379576632, 
0.361428444, 0.502439758, 0.001326035, 0.027652693, 0.188885638, 
0.579244445, 0.471985778, 0.677458228, 0.119307242, 0.364857868, 
0.238260538, 0.53472206, 0.204344281, 0.291888993, 0.295809688, 
0.00029, 0.005476157, 0.960975822, 0.00029, 0.055915429, 
0.618284682, 0.040605253, 0.521649682, 0.421086546, 0.164333061, 
0.755528982, 0.306854182, 0.012832628, 0.270393143, 0.946675764, 
0.59227376, 0.112658388, 0.429091426, 0.01662083, 0.017342483, 
0.065817234, 0.012140224, 0.359828816, 0.031969725, 0.00029, 
0.14555102, 0.18865081, 0.00029, 0.064107531, 0.505257768, 
0.070224536, 0.017082975, 0.375864198, 0.00029, 0.104103689, 
0.898979883, 0.004879605, 0.003597954, 0.036722932, 0.849058218, 
0.00029, 0.003739938, 0.00029, 0.00029, 0.00029, 0.008179017, 
0.193870353, 0.460181712, 0.389475522, 0.00029, 0.8785017, 
0.070414642, 0.584977921, 0.990764677, 0.767253318, 0.002234906, 
0.051331823, 0.00446149, 0.234477639, 0.275139791)), .Names = c("var1", "var2", "variable", "value"), row.names = c(NA, -119L), class = "data.frame")

2 个答案:

答案 0 :(得分:4)

我不打算进入你所有的主题设置 - 据我所知,问题的关键是填充渐变的比例。您可以使用日志转换在scale_fill_gradient()中设置此项:

p <- ggplot(sampledata.m, aes(var2, var1)) + 
  geom_tile(aes(fill = value), colour = "transparent") +
  scale_fill_gradient(trans = "log", low = "light green", high = "black", 
    breaks = c(0, 0.001, 0.05, 0.5))

enter image description here

答案 1 :(得分:0)

midpoint=10

enter image description here

但如果我将scale_fill_gradient2添加到val rdd = file.map(line => (line,{ val chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; val word = new String; val res = new String; val rnd = new Random; val len = 4 + rnd.nextInt((6-4)+1); for(i <- 1 to len){ val char = chars(rnd.nextInt(51)); word.concat(char.toString); } word; })) ,图片将会变为:

enter image description here