我已遵循以下问题的答案:Tukey test results on geom_boxplot with facet_grid
太好了,但是我也想比较它们之间的方面。换句话说,首先对所有结果进行字母排序,然后将其划分为多个方面(我同时拥有水平和垂直方面)。我怎样才能做到这一点?另外,如何重新排序字母,使其从第一个方面的第一个变量的“ a”开始,然后从第二个变量的“ b”开始,依此类推?我尝试了以下操作,但由于无法订购而无法正常工作。
TUKEY <- TukeyHSD(ANOVA, ordered = TRUE)
这是可复制的代码(用于生成图的代码是从上面的链接中获取的),数据是从此链接中获取的(http://sape.inf.usi.ch/quick-reference/ggplot2/facet)
d=expand.grid(obs=0:10, benchmark=c('antlr', 'bloat', 'chart'), gc=c('CopyMS', 'GenCopy', 'GenImmix'), opt=c('on', 'off', 'valid'), heapSize=seq(from=1.5, to=4, by=0.5))
d$time = rexp(nrow(d), 0.01)+1000
d$time = d$time + abs(d$heapSize-3)*100
d$time[d$opt=='on'] = d$time[d$opt=='on']-200
d$time[d$opt=='on' & d$benchmark=='bloat'] = d$time[d$opt=='on' & d$benchmark=='bloat'] + 190
generate_label_df <- function(TUKEY, variable){
# Extract labels and factor levels from Tukey post-hoc
Tukey.levels <- variable[,4]
Tukey.labels <- data.frame(multcompLetters(Tukey.levels)['Letters'])
#I need to put the labels in the same order as in the boxplot :
Tukey.labels$treatment=rownames(Tukey.labels)
Tukey.labels=Tukey.labels[order(Tukey.labels$treatment) , ]
return(Tukey.labels)
}
TUKEYplot <- function(df){
p<-ggplot(data=df)+
aes(x = opt, y = time, colour = opt) +
geom_boxplot() +
facet_grid(gc~benchmark) +
theme_linedraw() +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
ylim(min(df$time),max(df$time)+0.05) +
labs(x = "type", y= "time", color = "state") +
theme(strip.background = element_rect(colour = "black", fill = "white")) +
theme(strip.text = element_text(colour = "black", size=12)) +
theme(axis.text=element_text(size=12)) +
theme(legend.text=element_text(size=12)) +
theme(legend.title=element_text(size=12,face="bold")) +
theme(axis.title=element_text(size=14,face="bold")) +
scale_color_npg()
for (facetk2 in as.character(unique(df$gc))) {
for (facetk in as.character(unique(df$benchmark))) {
subdf <- subset(df, df$benchmark==facetk & df$gc==facetk2)
model=lm(time ~ opt, data=subdf)
ANOVA=aov(model)
# Tukey test to study each pair of treatment :
TUKEY <- TukeyHSD(ANOVA)
print(TUKEY)
labels <- generate_label_df(TUKEY , TUKEY$`opt`)
names(labels) <- c('Letters', 'opt')
yvalue <- aggregate(.~opt, data=subdf, quantile, probs=.75)
final <- merge(labels, yvalue)
final$benchmark <- facetk
final$gc <- facetk2
p <- p + geom_text(data = final, aes(x=opt, y=time, label=Letters),
vjust=-1.2, hjust=-.5, show.legend = FALSE, size=5)
}
}
return (p)
}
p1<-TUKEYplot(d)
p1
更新:视觉帮助:
原始图:
所需的部分情节:
答案 0 :(得分:0)
我终于想出了办法,所以我发布了答案!基本上,将Tukey的计算从循环中移出,在交互上使用ANOVA并在允许的情况下应用Tukey。然后将标签分成几列(确保您的数据不包含“:”,如果包含,则可以使用重值),然后将其遍历数据级别。
TUKEYplot <- function(df){
p<-ggplot(data=df)+
aes(x = opt, y = time, colour = opt) +
geom_boxplot() +
facet_grid(gc~benchmark) +
theme_linedraw() +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
ylim(min(df$time),max(df$time)+0.05) +
labs(x = "type", y= "time", color = "state") +
theme(strip.background = element_rect(colour = "black", fill = "white")) +
theme(strip.text = element_text(colour = "black", size=12)) +
theme(axis.text=element_text(size=12)) +
theme(legend.text=element_text(size=12)) +
theme(legend.title=element_text(size=12,face="bold")) +
theme(axis.title=element_text(size=14,face="bold")) +
scale_color_npg()
model=lm(time ~ gc*benchmark*opt, data=df)
ANOVA=aov(model)
# Tukey test to study each pair of treatment :
TUKEY <- TukeyHSD(ANOVA)
all_labels <- generate_label_df(TUKEY , TUKEY$`gc:benchmark:opt`)
sep_labels<- all_labels %>% separate(col=treatment, into= c("gc", "benchmark", "opt"), sep=":")
for (facetk2 in as.character(unique(df$gc))) {
for (facetk in as.character(unique(df$benchmark))) {
subdf <- subset(df, df$benchmark==facetk & df$gc==facetk2)
labels <- subset(sep_labels, sep_labels$benchmark==facetk & sep_labels$gc==facetk2)
labels <- subset(labels, select = -c(gc,benchmark))
names(labels) <- c('Letters', 'opt')
yvalue <- aggregate(.~opt, data=subdf, quantile, probs=.75)
final <- merge(labels, yvalue)
final$benchmark <- facetk
final$gc <- facetk2
p <- p + geom_text(data = final, aes(x=opt, y=time, label=Letters),
vjust=-1.2, hjust=-.5, show.legend = FALSE, size=5)
}
}
return (p)
}
生成的图像:(由于我的信誉不足,所以无法嵌入图像。)