所有R专家的好日子!
我有一个用ggplot2
库创建的多面盒子图:
true_1 <- c(1,2,3,4,6,9,10)
false_1 <- c(8,9,11,12,14,15,16,20,24)
true_2 <- c(10,12,13,18,22,24,28,30)
false_2 <- c(25,29,33,35,40,44,50,53,55,60,61)
true_3 <- c(-2,-1,0,0,0,1,1,1,4)
false_3 <- c(1,2,4,5,5,6,6,12)
values <- c(true_1, false_1, true_2, false_2, true_3, false_3)
category <- c(rep("True positives", length(true_1)), rep("False positives", length(false_1)),
rep("True positives", length(true_2)), rep("False positives", length(false_2)),
rep("True positives", length(true_3)), rep("False positives", length(false_3)))
method <- c(rep("Method A", length(true_1) + length(false_1)), rep("Method B", length(true_2) + length(false_2)), rep("Method C", length(true_3) + length(false_3)))
my.data.df <- as.data.frame(t(rbind(values, category, method)))
colnames(my.data.df) <- c("Value", "Category", "Method")
write.table(my.data.df, "my_data.txt")
my.data.2.df <- as.data.frame(read.table("my_data.txt"))
give.n <- function(x) {return(c(y=max(x)+(max(x)-min(x))*0.1, label=length(x)))}
ggplot(my.data.2.df, aes(x=factor(Category, levels=c("True positives","False positives")), y=Value)) +
facet_wrap("Method", scales="free_y") +
stat_boxplot(geom="errorbar", size=1, color="#808080") +
stat_summary(fun.data=give.n, geom="text", fun.y=median, size=6, color="#000000") +
geom_boxplot(fill="#F0F0F0", color="#808080", size=1, outlier.size=5) +
geom_jitter(colour="#4040FF", position=position_jitter(width=0.3), size=4, alpha=0.4) +
theme_classic() +
theme(text=element_text(size=12, face="bold"), axis.title.x=element_blank(), axis.text.x=element_text(size=12, face="bold"), axis.text.y=element_text(size=12)) +
ylab("Score")
我想对每组真阳性数据的一些计算结果进行注释(仅限真阳性):
所以,我的问题是:我怎么能做到这一点?我知道有stat_summary
,geom_text
或annotate
的技巧,我可以部分地达到预期效果,但是,我无法弄清楚,如何同时制作所有内容:
1)注释的位置 - 高于每个箱形图的最大值
2)对类别的特异性(我不希望看到任何注释高于误报)
3)结果的计算是在单独的函数中进行的
P.S。如果是二维小平面网格,应用了 lukeA 的尖端:
true_1 <- c(1,2,3,6,9,10)
true_1_filter <- c(1,2,3,4,6,9,27)
false_1 <- c(8,9,11,12,14,15,16,20,24)
false_1_filter <- c(8,9,11,12,14,15,16,20,24)
true_2 <- c(10,12,13,18,22,24,28,30)
true_2_filter <- c(15,17,19,19,25,30,34,40)
false_2 <- c(25,29,33,35,40,44,50,53,55,60,61)
false_2_filter <- c(30,35,39,42,43,44,60,61)
true_3 <- c(-2,-1,0,0,0,1,1,1,4)
true_3_filter <- c(-2,-1,0,0,0,1,1,1,4)
false_3 <- c(1,2,4,5,5,6,6,12)
false_3_filter <- c(1,2,4,5,5,6,12)
values <- c(true_1, true_1_filter, false_1, false_1_filter, true_2, true_2_filter, false_2, false_2_filter, true_3, true_3_filter, false_3, false_3_filter)
category <- c(rep("True positives", length(true_1) + length(true_1_filter)), rep("False positives", length(false_1) + length(false_1_filter)),
rep("True positives", length(true_2) + length(true_2_filter)), rep("False positives", length(false_2) + length(false_2_filter)),
rep("True positives", length(true_3) + length(true_3_filter)), rep("False positives", length(false_3) + length(false_3_filter)))
method <- c(rep("Method A", length(true_1) + length(true_1_filter) + length(false_1) + length(false_1_filter)),
rep("Method B", length(true_2) + length(true_2_filter) + length(false_2) + length(false_2_filter)),
rep("Method C", length(true_3) + length(true_3_filter) + length(false_3) + length(false_3_filter)))
filter <- c(rep("No filter", length(true_1)), rep("Filter", length(true_1_filter)), rep("No filter", length(false_1)), rep("Filter", length(false_1_filter)),
rep("No filter", length(true_2)), rep("Filter", length(true_2_filter)), rep("No filter", length(false_2)), rep("Filter", length(false_2_filter)),
rep("No filter", length(true_3)), rep("Filter", length(true_3_filter)), rep("No filter", length(false_3)), rep("Filter", length(false_3_filter)))
my.data.df <- as.data.frame(t(rbind(values, category, method, filter)))
colnames(my.data.df) <- c("Value", "Category", "Method", "Filter")
write.table(my.data.df, "my_data.txt")
my.data.2.df <- as.data.frame(read.table("my_data.txt"))
give.n <- function(x) {return(c(y=max(x)+max((max(x)-min(x))*0.1,1), label=length(x)))}
ggplot(my.data.2.df, aes(x=factor(Category, levels=c("True positives","False positives")), y=Value)) +
facet_wrap(Filter ~ Method, scales="free_y") +
stat_boxplot(geom="errorbar", size=1, color="#808080") +
stat_summary(fun.data=give.n, geom="text", fun.y=median, size=6, color="#000000") +
geom_boxplot(fill="#F0F0F0", color="#808080", size=1, outlier.size=5) +
geom_jitter(colour="#4040FF", position=position_jitter(width=0.3), size=4, alpha=0.4) +
theme_classic() +
theme(text=element_text(size=12, face="bold"), axis.title.x=element_blank(), axis.text.x=element_text(size=12, face="bold"), axis.text.y=element_text(size=12)) +
ylab("Score") +
geom_text(data = data.frame(Method = levels(my.data.2.df$Method), Filter = levels(my.data.2.df$Filter), Category = "True positives",
y = aggregate(Value ~ Method + Filter, data = my.data.2.df[my.data.2.df$Category == "True positives", ], FUN = max)$Value,
label = paste0("Result ", LETTERS[1:3], " {}")),
aes(y = y, label = label), color = "orange", vjust = -3, fontface = "bold")
所以,仍有两个不同的问题:
1)图中值的顺序与geom_text
内的新数据框不匹配
2)有些标签超出了情节(vjust
不能顺利运作)
答案 0 :(得分:2)
假设您将地图存储在p
中,您可以创建另一个数据框:
p + geom_text(data = data.frame(Method = levels(my.data.2.df$Method),
Category = "True positives",
y = aggregate(Value ~ Method, data = my.data.2.df[my.data.2.df$Category == "True positives", ], FUN = max)$Value,
label = paste0("Result ", LETTERS[1:3], " {}")),
aes(y = y,
label = label),
color = "orange",
vjust = -3,
fontface = "bold")
结果:
更新
关于OP的评论,我建议调整标签的y坐标,而不是使用vjust
。一种方法:
f <- function(my.data.2.df, yAdj = .2) {
tmp <- transform(expand.grid(Method = levels(my.data.2.df$Method),
Filter = levels(my.data.2.df$Filter)),
Category = "True positives",
y = aggregate(Value ~ Method + Filter,
data = my.data.2.df[my.data.2.df$Category == "True positives", ],
FUN = max)$Value)
tmp$label <- paste0("Result ", LETTERS[1:nrow(tmp)], " {}")
y.adjust <- as.vector(diff(t(aggregate(Value ~ Method + Filter,
data = my.data.2.df,
FUN = range)$Value))) * yAdj
tmp$y <- tmp$y + y.adjust
return(tmp)
}
p + geom_text(data = f(my.data.2.df),
aes(y = y, label = label), color = "orange", fontface = "bold")