用ggplot2着色内核密度图的facet.grid

时间:2019-01-24 18:48:23

标签: r ggplot2

请考虑以下df

df<-structure(list(Trial = structure(c(1L, 5L, 1L, 5L, 1L, 4L, 3L, 
2L, 2L, 4L, 3L, 3L, 2L, 5L, 4L, 1L, 2L, 3L, 5L, 1L, 2L, 1L, 4L, 
3L, 1L, 3L, 3L, 2L, 3L, 5L, 1L, 3L, 3L, 5L, 5L, 1L, 4L, 3L, 3L, 
1L, 1L, 5L, 5L, 1L, 3L, 5L, 2L, 1L, 5L, 3L, 2L, 1L, 4L, 3L, 5L, 
3L, 4L, 1L, 2L, 2L, 2L, 2L, 4L, 1L, 4L, 5L, 3L, 1L, 5L, 3L, 3L, 
4L, 2L, 2L, 4L, 4L, 1L, 3L, 4L, 5L, 4L, 2L, 3L, 1L, 1L, 4L, 2L, 
3L, 5L, 2L, 2L, 4L, 1L, 4L, 4L, 5L, 2L, 4L, 2L, 4L, 1L, 4L, 3L, 
5L, 4L, 5L, 2L, 3L, 2L, 2L, 5L, 1L, 3L, 3L, 3L, 1L, 2L, 4L, 5L, 
3L, 1L, 2L, 5L, 1L, 4L, 3L, 2L, 2L, 5L, 1L, 5L, 1L, 4L, 5L, 5L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 3L, 2L, 5L, 2L, 5L, 2L, 4L, 1L), .Label = 
c("ES8-13", "ES14-25", "ES26-38", "SA1-12", "SA14-25"), class = "factor"), 
MAF = c(-0.022, 0.141, -0.035, 0.076, -0.019, -0.064, -0.044, 
0.088, 0.067, 0.049, 0.088, 0.053, -0.052, -0.078, 0.104, 
-0.019, -0.075, -0.049, 0.098, -0.145, 0.094, 0.014, 0.016, 
0.00599999999999999, 0.027, 0.117, -0.011, 0.055, 0.079, 
0.26, -0.049, 0.065, 0.227, 0.141, -0.091, -0.021, -0.306, 
0.162, -0.089, -0.068, 0.00700000000000001, 0.034, 0.02, 
0.038, 0.129, 0.099, 0.06, -0.09, 0.104, 0.195, 0.165, -0.047, 
0.074, -0.01, 0.002, -0.068, 0.054, 0.012, -0.012, 0.222, 
0.046, 0.00700000000000001, -0.022, 0.00499999999999995, 
-0.051, 0.126, 0.073, 0.094, -0.254, 0.185, 0.238, 0.099, 
0.027, 0.044, -0.018, 0.014, -0.058, -0.005, -0.00999999999999998, 
-0.002, 0.061, 0.178, 0.001, 0.105, -0.001, -0.088, 0.113, 
0.134, 0.175, 0.06, -0.026, 0.048, 0.003, 0.049, 0.0649999999999999, 
-0.135, -0.036, -0.069, 0.015, -0.058, 0.024, 0.093, 0.123, 
-0.144, 0.011, 0.343, 0.002, -0.018, 0.055, -0.047, -0.317, 
-0.033, -0.018, 0.068, -0.044, 0.05, 0.079, 0.122, -0.071, 
0.13, 0.078, 0.085, 0.012, -0.02, -0.088, -0.086, -0.026, 
0.046, 0.101, -0.026, 0.005, 0.00700000000000001, 0.064, 
0.066, -0.085, 0.114, 0.003, 0.004, -0.003, 0.097, 0.055, 
-0.063, -0.089, 0.104, -0.199, 0.01, 0.184, 0.183, 0.129, 
-0.059)), row.names = c(1146L, 163986L, 34946L, 168682L, 
33356L, 152862L, 103827L, 54557L, 68666L, 141066L, 118349L, 93909L, 
67299L, 193633L, 129212L, 39273L, 71459L, 102636L, 176655L, 30543L, 
46107L, 32608L, 122906L, 100356L, 37635L, 81566L, 116510L, 61803L, 
96219L, 187927L, 9211L, 106999L, 88554L, 181316L, 176250L, 32656L, 
150472L, 80615L, 111414L, 16038L, 23319L, 185075L, 175803L, 32648L, 
106332L, 185991L, 65155L, 32165L, 189972L, 92486L, 44161L, 404L, 
123856L, 80513L, 180030L, 101190L, 145315L, 5498L, 75891L, 77358L, 
67571L, 72894L, 127763L, 6584L, 139250L, 163126L, 101492L, 22520L, 
181276L, 82673L, 94756L, 142750L, 48377L, 59931L, 140900L, 154339L, 
2769L, 110265L, 130494L, 186334L, 138079L, 50754L, 82207L, 24578L, 
26393L, 128021L, 69283L, 84549L, 187875L, 76775L, 45715L, 138049L, 
1972L, 137218L, 158324L, 200014L, 61611L, 147430L, 60938L, 154928L, 
22421L, 159532L, 98190L, 166565L, 151667L, 180407L, 55681L, 89127L, 
54396L, 65975L, 172695L, 21969L, 80439L, 81202L, 87282L, 35394L, 
53137L, 131886L, 163181L, 84221L, 32007L, 57711L, 160393L, 32843L, 
157924L, 104820L, 63993L, 55023L, 160342L, 20800L, 167583L, 15849L, 
143476L, 172878L, 195659L, 49812L, 4971L, 44583L, 24399L, 77026L, 
16862L, 56500L, 113282L, 65688L, 188635L, 75437L, 190601L, 54633L, 
137420L, 27389L), class = "data.frame")

以下是df的摘要:

Trial    MAF
ES8-13 -0.022
SA14-25  0.141
ES8-13 -0.035
SA14-25  0.076
ES8-13 -0.019
SA1-12 -0.064

我绘制了以下内核密度图:

 p <- ggplot(df,aes(x=MAF)) +
 geom_density(fill='grey') + facet_grid(Trial ~.)
 p

enter image description here

我想给两个低于90%的尾巴区域加阴影。例如,使用以下命令,我可以得到整个df的分位数:

qt <- quantile(df$MAF,probs=c(.05,.95))

但是我宁愿需要Trial因子每个级别的分位数,如下所示:

require(dplyr)
qt05<-alele_freq_dev %>% group_by(Trial) %>% 
summarise(quantile(MAF,probs=c(.05)))
qt95<-alele_freq_dev %>% group_by(Trial) %>% 
summarise(quantile(MAF,probs=c(.95)))

考虑到这些分位数,我将需要为图形的每个方面都着色因子Trial的每个级别。我找到了解决此问题的方法,但仅适用于奇异情况。

有人可以帮我解决facet.grid案吗?

1 个答案:

答案 0 :(得分:3)

我使用library(ggridges)进行分发,因为它具有很多不错的功能,包括自定义分位数阴影的功能!

这里是一个没有构面的示例,因为使用此策略,您可能不再需要构面:

library(ggridges)

ggplot(df, aes(x=MAF, y = Trial, fill=factor(..quantile..))) +
    stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = c(0.05, 0.95), scale = 1) +
    scale_fill_manual(values = c("#FDE725FF", "#A0A0A0A0", "#FDE725FF"),
                      name = NULL,
                      labels = c("lower 5%", "middle 90%", "upper 90%"))

enter image description here

如果您仍然想进行多面路线设计,则一个缺点是stat_density_ridges需要一种y的美感。所以我会做这样的事情来稍微调整主题,并使剧情看起来既干净又整洁(没人会知道其中存在一个y aes!):

ggplot(df, aes(x=MAF, y = Trial, fill=factor(..quantile..))) +
    stat_density_ridges(geom = "density_ridges_gradient", calc_ecdf = TRUE, quantiles = c(0.05, 0.95), scale = 1) +
    scale_fill_manual(values = c("#FDE725FF", "#A0A0A0A0", "#FDE725FF"),
                      name = NULL,
                      labels = c("lower 5%", "middle 90%", "upper 5%")) +
    facet_grid(Trial~ ., scales = "free_y") +
    theme(axis.text.y = element_blank(), # clean up overhead
          axis.ticks.y = element_blank())

enter image description here

很显然,您可以根据需要调整颜色和标签,只需确保它们与在geom层中设置的quantiles有意义。如果您还有其他问题,请告诉我。