如何使用ggpubr中的facet_wrap为R中的每个数据帧组应用p值

时间:2019-04-01 17:02:53

标签: r anova

我有一个看起来像这样的数据:

melted.df <- structure(list(Time = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("24", 
"36", "48", "72"), class = "factor"), id = c(1L, 2L, 3L, 4L, 
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 
18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), Samples = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L), .Label = c("WT_Ago2_800", "WT_Ago2_400", "WT_Ago2_200", 
"WT_Ago4_800"), class = "factor"), Size = c(0, 0, 0, 0, 0, 0, 
0.3, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.8, 0.5, 0, 0, 
0, 0, 0, 0, 0.1, 0.65, 0.2, 0.85, 0.725, 0.575, 0.1, 1.1, 0.9, 
1.325, 1, 0.8, 0.5, 2.2, 1.65, 0, 0, 0, 0, 0, 0, 0.825, 1.175, 
0.1, 0.55, 0.85, 0.85, 1.1, 1.4, 0.6, 0.95, 1.15, 0.975, 2.35, 
1.15, 2.1, 0, 0, 0, 0, 0, 0, 0.65, 1.4, 0.55, 0.1, 0.7, 1.1, 
0.95, 1.85, 0.85, 0.1, 1.5, 1.25, 1.8, 1.75, 2.15)), row.names = c(NA, 
-84L), class = "data.frame")

此数据包含4个时间范围(24、36、48和72小时)。我想使用下面的代码为每个stat.test粘贴计算为time.levels的p值,并将其应用于每个facet_wrap。如果您检查i = 1,则没有p值,因此您无需将其应用于图形,如果i = 2,则将p值应用于图形。问题是我无法将p值应用于其各自的方面。它只是在所有方面应用相同的p值。我该如何解决?

代码:

library(devtools)
# install_github("https://github.com/kassambara/rstatix")
library(rstatix)  # https://github.com/kassambara/rstatix
library(stringi)
library(ggpubr)
time.levels <- levels(melted.df$Time)
stat.test <- NULL
for (i in 1:length(time.levels)){
  stat.test <- aov(Size ~ Samples, data = melted.df[melted.df$Time == time.levels[i],]) %>%
  tukey_hsd()
# stat.test <- rbind(stat.test, tmp.stat)
 bp <-  ggboxplot(melted.df, x = "Samples", y = "Size") +
    facet_wrap(vars(Time))+
    stat_pvalue_manual(
      stat.test, label = "p.adj",
      y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
    )
 bp

}

2 个答案:

答案 0 :(得分:1)

注意。您Time == 24L的“大小”中的所有值均为零:

> filter(melted.df, Time == 24L) %>% select(Size) %>% summary
      Size  
 Min.   :0  
 1st Qu.:0  
 Median :0  
 Mean   :0  
 3rd Qu.:0  
 Max.   :0  

如果您仍然希望继续进行操作,则应单独绘制图,然后使用gridExtra::grid.arrange

library(gridExtra)
bp <- vector("list", length = length(time.levels))
for (i in seq_along(time.levels)) {
  sdf <- melted.df[melted.df$Time == time.levels[i],]
  stat.test <- aov(Size ~ Samples, data = sdf) %>%
      tukey_hsd()

 bp[[i]] <-  ggboxplot(sdf, x = "Samples", y = "Size") +
    facet_wrap(vars(Time))+
    stat_pvalue_manual(
      stat.test, label = "p.adj",
      y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
    )
}
do.call(grid.arrange, bp)

请注意,您必须将子集data.frame sdf用作ggboxplot的输入。

答案 1 :(得分:1)

您不需要使用gridExtra::grid.arrange

这是一个干净的解决方案。

library(rstatix) # latest version
library(ggpubr)  # latest version
stat.test <- melted.df %>%
  group_by(Time) %>%
  tukey_hsd(Size ~ Samples)

ggboxplot(melted.df, x = "Samples", y = "Size", facet.by = "Time") +
  stat_pvalue_manual(
    stat.test, label = "p.adj",
    y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
  )