跟进this question并为了完整起见,我修改了接受的答案并对结果图进行了自定义,但我仍面临一些重要问题。
总之,我正在做箱形图,反映了Kruskal-Wallis和成对Wilcoxon测试比较的重要性。我想用星号替换p值数字,只显示重要的比较,将垂直间距减少到最大值。
基本上我想做this,但是由于增加了方面的问题,这会搞砸一切。
到目前为止,我已经开发了一个非常不错的MWE,但它仍然存在问题......
library(reshape2)
library(ggplot2)
library(gridExtra)
library(tidyverse)
library(data.table)
library(ggsignif)
library(RColorBrewer)
data(iris)
iris$treatment <- rep(c("A","B"), length(iris$Species)/2)
mydf <- melt(iris, measure.vars=names(iris)[1:4])
mydf$treatment <- as.factor(mydf$treatment)
mydf$variable <- factor(mydf$variable, levels=sort(levels(mydf$variable)))
mydf$both <- factor(paste(mydf$treatment, mydf$variable), levels=(unique(paste(mydf$treatment, mydf$variable))))
# Change data to reduce number of statistically significant differences
set.seed(2)
mydf <- mydf %>% mutate(value=rnorm(nrow(mydf)))
##
##FIRST TEST BOTH
#Kruskal-Wallis
addkw <- as.data.frame(mydf %>% group_by(Species) %>%
summarize(p.value = kruskal.test(value ~ both)$p.value))
#addkw$p.adjust <- p.adjust(addkw$p.value, "BH")
a <- combn(levels(mydf$both), 2, simplify = FALSE)
#new p.values
pv.final <- data.frame()
for (gr in unique(mydf$Species)){
for (i in 1:length(a)){
tis <- a[[i]] #variable pair to test
as <- subset(mydf, Species==gr & both %in% tis)
pv <- wilcox.test(value ~ both, data=as)$p.value
ddd <- data.table(as)
asm <- as.data.frame(ddd[, list(value=mean(value)), by=list(both=both)])
asm2 <- dcast(asm, .~both, value.var="value")[,-1]
pf <- data.frame(group1=paste(tis[1], gr), group2=paste(tis[2], gr), mean.group1=asm2[,1], mean.group2=asm2[,2], FC.1over2=asm2[,1]/asm2[,2], p.value=pv)
pv.final <- rbind(pv.final, pf)
}
}
#pv.final$p.adjust <- p.adjust(pv.final$p.value, method="BH")
pv.final$map.signif <- ifelse(pv.final$p.value > 0.05, "", ifelse(pv.final$p.value > 0.01,"*", "**"))
cols <- colorRampPalette(brewer.pal(length(unique(mydf$Species)), "Set1"))
myPal <- cols(length(unique(mydf$Species)))
#Function to get a list of plots to use as "facets" with grid.arrange
plot.list=function(mydf, pv.final, addkw, a, myPal){
mylist <- list()
i <- 0
for (sp in unique(mydf$Species)){
i <- i+1
mydf0 <- subset(mydf, Species==sp)
addkw0 <- subset(addkw, Species==sp)
pv.final0 <- pv.final[grep(sp, pv.final$group1), ]
num.signif <- sum(pv.final0$p.value <= 0.05)
P <- ggplot(mydf0,aes(x=both, y=value)) +
geom_boxplot(aes(fill=Species)) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
facet_grid(~Species, scales="free", space="free_x") +
scale_fill_manual(values=myPal[i]) + #WHY IS COLOR IGNORED?
geom_text(data=addkw0, hjust=0, size=4.5, aes(x=0, y=round(max(mydf0$value, na.rm=TRUE)+0.5), label=paste0("KW p=",p.value))) +
geom_signif(test="wilcox.test", comparisons = a[which(pv.final0$p.value<=0.05)],#I can use "a"here
map_signif_level = F,
vjust=0,
textsize=4,
size=0.5,
step_increase = 0.05)
if (i==1){
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_text(size=20),
axis.title=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
} else{
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
#WHY USING THE CODE BELOW TO CHANGE NUMBERS TO ASTERISKS I GET ERRORS?
#P2 <- ggplot_build(P)
#P2$data[[3]]$annotation <- rep(subset(pv.final0, p.value<=0.05)$map.signif, each=3)
#P <- plot(ggplot_gtable(P2))
mylist[[sp]] <- list(num.signif, P)
}
return(mylist)
}
p.list <- plot.list(mydf, pv.final, addkw, a, myPal)
y.rng <- range(mydf$value)
# Get the highest number of significant p-values across all three "facets"
height.factor <- 0.3
max.signif <- max(sapply(p.list, function(x) x[[1]]))
# Lay out the three plots as facets (one for each Species), but adjust so that y-range is same for each facet. Top of y-range is adjusted using max_signif.
png(filename="test.png", height=800, width=1200)
grid.arrange(grobs=lapply(p.list, function(x) x[[2]] +
scale_y_continuous(limits=c(y.rng[1], y.rng[2] + height.factor*max.signif))),
ncol=length(unique(mydf$Species)), top="Random title", left="Value") #HOW TO CHANGE THE SIZE OF THE TITLE AND THE Y AXIS TEXT?
#HOW TO ADD A COMMON LEGEND?
dev.off()
它产生以下图:
正如你所看到的,有一些问题,最明显的是:
1 - 着色因某种原因无效
2 - 我似乎无法使用星号更改注释
我想要更像这样的东西(样机):
所以我们需要:
1 - 让着色工作
2 - 显示星号而非数字
...并获胜:
3 - 制作一个共同的传奇
4 - 将Kruskal-Wallis线置于顶部
5 - 更改标题和y轴文字的大小(和对齐方式)
重要提示
我很感激我的代码尽可能完整,即使它不是最漂亮的,因为我仍然需要使用像“CNb”或“pv.final”这样的中间对象。
解决方案应该很容易转移到其他情况;请考虑单独测试“变量”,而不是“两者”......在这种情况下,我们有6个“刻面”(垂直和水平),一切都变得更加紧张......
我做了另一个MWE:
##NOW TEST MEASURE, TO GET VERTICAL AND HORIZONTAL FACETS
addkw <- as.data.frame(mydf %>% group_by(treatment, Species) %>%
summarize(p.value = kruskal.test(value ~ variable)$p.value))
#addkw$p.adjust <- p.adjust(addkw$p.value, "BH")
a <- combn(levels(mydf$variable), 2, simplify = FALSE)
#new p.values
pv.final <- data.frame()
for (tr in levels(mydf$treatment)){
for (gr in levels(mydf$Species)){
for (i in 1:length(a)){
tis <- a[[i]] #variable pair to test
as <- subset(mydf, treatment==tr & Species==gr & variable %in% tis)
pv <- wilcox.test(value ~ variable, data=as)$p.value
ddd <- data.table(as)
asm <- as.data.frame(ddd[, list(value=mean(value, na.rm=T)), by=list(variable=variable)])
asm2 <- dcast(asm, .~variable, value.var="value")[,-1]
pf <- data.frame(group1=paste(tis[1], gr, tr), group2=paste(tis[2], gr, tr), mean.group1=asm2[,1], mean.group2=asm2[,2], FC.1over2=asm2[,1]/asm2[,2], p.value=pv)
pv.final <- rbind(pv.final, pf)
}
}
}
#pv.final$p.adjust <- p.adjust(pv.final$p.value, method="BH")
# set signif level
pv.final$map.signif <- ifelse(pv.final$p.value > 0.05, "", ifelse(pv.final$p.value > 0.01,"*", "**"))
plot.list2=function(mydf, pv.final, addkw, a, myPal){
mylist <- list()
i <- 0
for (sp in unique(mydf$Species)){
for (tr in unique(mydf$treatment)){
i <- i+1
mydf0 <- subset(mydf, Species==sp & treatment==tr)
addkw0 <- subset(addkw, Species==sp & treatment==tr)
pv.final0 <- pv.final[grep(paste(sp,tr), pv.final$group1), ]
num.signif <- sum(pv.final0$p.value <= 0.05)
P <- ggplot(mydf0,aes(x=variable, y=value)) +
geom_boxplot(aes(fill=Species)) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
facet_grid(treatment~Species, scales="free", space="free_x") +
scale_fill_manual(values=myPal[i]) + #WHY IS COLOR IGNORED?
geom_text(data=addkw0, hjust=0, size=4.5, aes(x=0, y=round(max(mydf0$value, na.rm=TRUE)+0.5), label=paste0("KW p=",p.value))) +
geom_signif(test="wilcox.test", comparisons = a[which(pv.final0$p.value<=0.05)],#I can use "a"here
map_signif_level = F,
vjust=0,
textsize=4,
size=0.5,
step_increase = 0.05)
if (i==1){
P <- P + theme(legend.position="none",
axis.text.x=element_blank(),
axis.text.y=element_text(size=20),
axis.title=element_blank(),
axis.ticks.x=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
if (i==4){
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_text(size=20),
axis.title=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
if ((i==2)|(i==3)){
P <- P + theme(legend.position="none",
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.title=element_blank(),
axis.ticks.x=element_blank(),
axis.ticks.y=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
if ((i==5)|(i==6)){
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_blank(),
#axis.ticks.y=element_blank(), #WHY SPECIFYING THIS GIVES ERROR?
axis.title=element_blank(),
axis.ticks.y=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
#WHY USING THE CODE BELOW TO CHANGE NUMBERS TO ASTERISKS I GET ERRORS?
#P2 <- ggplot_build(P)
#P2$data[[3]]$annotation <- rep(subset(pv.final0, p.value<=0.05)$map.signif, each=3)
#P <- plot(ggplot_gtable(P2))
sptr <- paste(sp,tr)
mylist[[sptr]] <- list(num.signif, P)
}
}
return(mylist)
}
p.list2 <- plot.list2(mydf, pv.final, addkw, a, myPal)
y.rng <- range(mydf$value)
# Get the highest number of significant p-values across all three "facets"
height.factor <- 0.5
max.signif <- max(sapply(p.list2, function(x) x[[1]]))
# Lay out the three plots as facets (one for each Species), but adjust so that y-range is same for each facet. Top of y-range is adjusted using max_signif.
png(filename="test2.png", height=800, width=1200)
grid.arrange(grobs=lapply(p.list2, function(x) x[[2]] +
scale_y_continuous(limits=c(y.rng[1], y.rng[2] + height.factor*max.signif))),
ncol=length(unique(mydf$Species)), top="Random title", left="Value") #HOW TO CHANGE THE SIZE OF THE TITLE AND THE Y AXIS TEXT?
#HOW TO ADD A COMMON LEGEND?
dev.off()
产生以下情节:
现在颜色问题变得更加明显,小平面高度不均匀,并且应该使用冗余小平面条文本来完成。
我被困在这一点上,所以会感激任何帮助。很抱歉这个问题很长,但我认为它几乎就在那里!谢谢!
答案 0 :(得分:3)
您可以尝试以下操作。由于您的代码非常繁忙,而且对于我来说太难理解,我建议采用不同的方法。我试图避免循环并尽可能地使用tidyverse
。因此,首先我创建了您的数据。然后计算kruskal wallis测试,因为这在ggsignif
内是不可能的。之后,我将使用geom_signif
绘制所有p.values。最后,将删除无关紧要的内容并增加步骤。
1-使着色工作完成
2-显示星号而不是数字已完成
......胜利:
3-制作一个共同的传奇完成
4-将Kruskal-Wallis线置于顶部完成,我将值置于底部
5-更改标题和y轴文字的大小(和对齐方式)完成
library(tidyverse)
library(ggsignif)
# 1. your data
set.seed(2)
df <- as.tbl(iris) %>%
mutate(treatment=rep(c("A","B"), length(iris$Species)/2)) %>%
gather(key, value, -Species, -treatment) %>%
mutate(value=rnorm(n())) %>%
mutate(key=factor(key, levels=unique(key))) %>%
mutate(both=interaction(treatment, key, sep = " "))
# 2. Kruskal test
KW <- df %>%
group_by(Species) %>%
summarise(p=round(kruskal.test(value ~ both)$p.value,2),
y=min(value),
x=1) %>%
mutate(y=min(y))
# 3. Plot
P <- df %>%
ggplot(aes(x=both, y=value)) +
geom_boxplot(aes(fill=Species)) +
facet_grid(~Species) +
ylim(-3,7)+
theme(axis.text.x = element_text(angle=45, hjust=1)) +
geom_signif(comparisons = combn(levels(df$both),2,simplify = F),
map_signif_level = T) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
xlab("") +
geom_text(data=KW,aes(x, y=y, label=paste0("KW p=",p)),hjust=0) +
ggtitle("Plot") + ylab("This is my own y-lab")
# 4. remove not significant values and add step increase
P_new <- ggplot_build(P)
P_new$data[[2]] <- P_new$data[[2]] %>%
filter(annotation != "NS.") %>%
group_by(PANEL) %>%
mutate(index=(as.numeric(group[drop=T])-1)*0.5) %>%
mutate(y=y+index,
yend=yend+index) %>%
select(-index) %>%
as.data.frame()
# the final plot
plot(ggplot_gtable(P_new))
和使用两个方面的类似方法
# --------------------
# 5. Kruskal
KW <- df %>%
group_by(Species, treatment) %>%
summarise(p=round(kruskal.test(value ~ both)$p.value,2),
y=min(value),
x=1) %>%
ungroup() %>%
mutate(y=min(y))
# 6. Plot with two facets
P <- df %>%
ggplot(aes(x=key, y=value)) +
geom_boxplot(aes(fill=Species)) +
facet_grid(treatment~Species) +
ylim(-5,7)+
theme(axis.text.x = element_text(angle=45, hjust=1)) +
geom_signif(comparisons = combn(levels(df$key),2,simplify = F),
map_signif_level = T) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
xlab("") +
geom_text(data=KW,aes(x, y=y, label=paste0("KW p=",p)),hjust=0) +
ggtitle("Plot") + ylab("This is my own y-lab")
# 7. remove not significant values and add step increase
P_new <- ggplot_build(P)
P_new$data[[2]] <- P_new$data[[2]] %>%
filter(annotation != "NS.") %>%
group_by(PANEL) %>%
mutate(index=(as.numeric(group[drop=T])-1)*0.5) %>%
mutate(y=y+index,
yend=yend+index) %>%
select(-index) %>%
as.data.frame()
# the final plot
plot(ggplot_gtable(P_new))
编辑。
关于p.adjust
的{{1}}需求,您可以自行设置功能并直接在geom_signif()
内调用。
wilcox.test.BH.adjusted <- function(x,y,n){
tmp <- wilcox.test(x,y)
tmp$p.value <- p.adjust(tmp$p.value, n = n,method = "BH")
tmp
}
geom_signif(comparisons = combn(levels(df$both),2,simplify = F),
map_signif_level = T, test = "wilcox.test.BH.adjusted",
test.args = list(n=8))
挑战在于知道最终会有多少次独立测试。然后,您可以自己设置n
。我在这里使用了8
。但这可能是错误的。
答案 1 :(得分:1)
一直知道在循环中构造ggplots会产生令人困惑的结果,对于点 1 的解释,我将引用this question和其他许多内容。还有一个关于在现场评估ggplot对象的提示,例如:通过print
。
重新点 2 ,你很接近,有点调试,试用和错误帮助。以下是plot.list
的完整代码:
plot.list=function(mydf, pv.final, addkw, a, myPal){
mylist <- list()
i <- 0
for (sp in unique(mydf$Species)){
i <- i+1
mydf0 <- subset(mydf, Species==sp)
addkw0 <- subset(addkw, Species==sp)
pv.final0 <- pv.final[grep(sp, pv.final$group1), ]
num.signif <- sum(pv.final0$p.value <= 0.05)
P <- ggplot(mydf0,aes(x=both, y=value)) +
geom_boxplot(aes(fill=Species)) +
stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
facet_grid(~Species, scales="free", space="free_x") +
scale_fill_manual(values=myPal[i]) +
geom_text(data=addkw0, hjust=0, size=4.5, aes(x=0, y=round(max(mydf0$value, na.rm=TRUE)+0.5), label=paste0("KW p=",p.value))) +
geom_signif(test="wilcox.test", comparisons = a[which(pv.final0$p.value<=0.05)],#I can use "a"here
map_signif_level = F,
vjust=0,
textsize=4,
size=0.5,
step_increase = 0.05)
if (i==1){
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_text(size=20),
axis.title=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
} else{
P <- P + theme(legend.position="none",
axis.text.x=element_text(size=20, angle=90, hjust=1),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title=element_blank(),
strip.text.x=element_text(size=20,face="bold"),
strip.text.y=element_text(size=20,face="bold"))
}
P2 <- ggplot_build(P)
P2$data[[4]]$annotation <- rep(subset(pv.final0, p.value<=0.05)$map.signif, each=3)
P <- ggplot_gtable(P2)
mylist[[sp]] <- list(num.signif, P)
}
return(mylist)
}
请注意,我们无法再通过ggplot语义修改绘图,因为我们已经应用了ggplot_build
/ ggplot_gtable
,因此无法再进行缩放修改。如果要保留它,请在plot.list
函数内移动它。所以,改为
grid.arrange(grobs=lapply(p.list, function(x) x[[2]]),
ncol=length(unique(mydf$Species)), top="Random title", left="Value")
产量
当然,这不是一个完整的解决方案,但我希望有所帮助。