如何使用ggplot为多个分布创建构面图

时间:2017-05-09 00:37:30

标签: r ggplot2 dplyr

我有以下数据框:


library(tidyverse)
set.seed(1)
df <- data_frame(
  genes = paste("Gene_",letters[0:10],sep=""),
  X = abs(rnorm(10, 0, 1)),
  Y = abs(rnorm(10, 0, 2)),
  Z = abs(rnorm(10, 0, 4)))

df
#> # A tibble: 10 × 4
#>     genes         X          Y         Z
#>     <chr>     <dbl>      <dbl>     <dbl>
#> 1  Gene_a 0.6264538 3.02356234 3.6759095
#> 2  Gene_b 0.1836433 0.77968647 3.1285452
#> 3  Gene_c 0.8356286 1.24248116 0.2982599
#> 4  Gene_d 1.5952808 4.42939977 7.9574068
#> 5  Gene_e 0.3295078 2.24986184 2.4793030
#> 6  Gene_f 0.8204684 0.08986722 0.2245150
#> 7  Gene_g 0.4874291 0.03238053 0.6231820
#> 8  Gene_h 0.7383247 1.88767242 5.8830095
#> 9  Gene_i 0.5757814 1.64244239 1.9126002
#> 10 Gene_j 0.3053884 1.18780264 1.6717662

这个估计上面每个非基因列的参数:


alldf <- structure(list(var = c("X", "Y", "Z", "X", "Y", "Z"), mod_est_mean = c(0.649790620181318, 
  1.65651567796795, 2.78544973796179, -0.594317687136244, -0.116217058012966, 
  0.518234267967891), mod_est_sd = c(0.37898907459421, 1.27340261798159, 2.38265470031565, 
  0.583177003946691, 1.49404482354149, 1.14803152575931), logLik = c(-4.48690631961252, 
  -16.6063107770219, -22.8715381956973, -2.85356316184894, -17.0420856382274, 
  -20.7522156015569), dist_name = c("normal", "normal", "normal", "lognormal", 
  "lognormal", "lognormal")), .Names = c("var", "mod_est_mean", "mod_est_sd", 
  "logLik", "dist_name"), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
  "data.frame"))
alldf
#>   var mod_est_mean mod_est_sd     logLik dist_name
#> 1   X    0.6497906  0.3789891  -4.486906    normal
#> 2   Y    1.6565157  1.2734026 -16.606311    normal
#> 3   Z    2.7854497  2.3826547 -22.871538    normal
#> 4   X   -0.5943177  0.5831770  -2.853563 lognormal
#> 5   Y   -0.1162171  1.4940448 -17.042086 lognormal
#> 6   Z    0.5182343  1.1480315 -20.752216 lognormal

我想要做的是为X,Y,Z创建构面图。每个方面包含3行(实际数据,正常拟合,对数正态拟合)。

我只能为X

做这件事
#Plot just one distribution (df$X)
x <- df$X
den <- density(x)
df_x <- data.frame(x = den$x, y = den$y)
df_x
norm_param <- alldf %>% filter(var=="X", dist_name=="normal")
lognorm_param <- alldf %>% filter(var=="X", dist_name=="lognormal")
ggplot(df_x, aes(x = x,y = y)) +
   geom_point(size = 3) +
  geom_line(aes(x=df_x$x, y=dnorm(x=df_x$x, mean=norm_param$mod_est_mean, sd=norm_param$mod_est_sd) ), color="red", size = 1) +
  geom_line(aes(x=df_x$x, y=dnorm(x=log(df_x$x), mean=lognorm_param$mod_est_mean, sd=lognorm_param$mod_est_sd) ), color="blue", size = 1) +
  theme_classic()
  # And how can I include legend for 3 lines?

enter image description here

黑色(实际数据),红色(正常拟合),蓝色(对数正态拟合)

但我怎么能为facet做到这一点?

2 个答案:

答案 0 :(得分:2)

这是一种方法。基本上,您希望尽可能将一个长数据帧传递给ggplot。从数据中创建长数据框。我将在此处lapplyalldf的每一行进行计算,以获得您想要的3个独立数据集,用它们代表的内容标记它们,然后将这些数据集绑定到一个长数据集中之一。

df <- lapply( X = seq_len( nrow( alldf ) ),
              FUN = function(x) {
                  df <- data.frame(
                      x = df_x$x,
                      y = df_x$y
                  )
                  if( alldf$dist_name[x] == "normal" ) {
                      df$d_norm <- dnorm( x = df_x$x, mean = alldf$mod_est_mean[x], sd = alldf$mod_est_sd[x] )
                  } else if( alldf$dist_name[x] == "lognormal" ) {
                      df$d_norm <- dnorm( x = log( df_x$x ), mean = alldf$mod_est_mean[x], sd = alldf$mod_est_sd[x] )
                  }
                  df$var <- alldf$var[x]
                  df$dist_name <- alldf$dist_name[x]
                  df
              } )
df <- do.call( rbind, df )

head( df )
           x           y     d_norm var dist_name
1 -0.3642300 0.002889036 0.02936093   X    normal
2 -0.3593232 0.003142633 0.03039330   X    normal
3 -0.3544164 0.003412660 0.03145670   X    normal
4 -0.3495096 0.003697873 0.03255185   X    normal
5 -0.3446028 0.004015857 0.03367948   X    normal
6 -0.3396959 0.004349725 0.03484033   X    normal

现在您已经获得了一些整洁的数据,您可以将其传递给ggplot,告诉它根据dist_name制作颜色,并将其作为aes。 }参数,并通过facet volumn值告诉var(在本例中为&#34;列&#34;&#34; grid&#34;)

ggplot( df ) +
    geom_point( aes( x = x, y = y ), size = 3 ) +
    geom_line( aes( x = x, y = d_norm, col = dist_name ), size = 1 ) +
    theme_classic() +
    facet_grid( . ~ var )

enter image description here

答案 1 :(得分:0)

晚了几年,但万一有人来这里寻找如何做到这一点,以下工作:

library(dplyr)
library(tidyr)
library(ggplot2)
df %>% 
    pivot_longer(where(is.numeric)) %>% 
    ggplot(aes(value))+
    geom_density(aes(color = 'emprical'), size = 1)+
    stat_function(fun = ~ dnorm(.x,
                                MASS::fitdistr(.x, 'normal')$estimate[[1]],
                                MASS::fitdistr(.x, 'normal')$estimate[[2]]),
                  aes(color = 'Normal'))+
    stat_function(fun = ~ dlnorm(.x, 
                                 MASS::fitdistr(.x, 'log-normal')$estimate[[1]], 
                                 MASS::fitdistr(.x, 'log-normal')$estimate[[2]]),
                  aes(color = 'Log-Normal'))+
    theme_minimal()+
    facet_wrap(name~., scale = 'free')

生成以下图: enter image description here