我正在创建一个自定义图表,以使用geom_density
可视化变量的分布。我为自定义值添加了3条垂直线,分别是第5个百分点和第95个百分点。
如何为这些行添加标签?
我尝试使用geom_text
,但是我不知道如何为x和y变量设置参数
library(ggplot2)
ggplot(dataset, aes(x = dataset$`Estimated percent body fat`)) +
geom_density() +
geom_vline(aes(xintercept = dataset$`Estimated percent body fat`[12]),
color = "red", size = 1) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.05, na.rm = TRUE)),
color = "grey", size = 0.5) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.95, na.rm = TRUE)),
color="grey", size=0.5) +
geom_text(aes(x = dataset$`Estimated percent body fat`[12],
label = "Custom", y = 0),
colour = "red", angle = 0)
我想获得以下信息:
这就是我所能获得的https://i.imgur.com/thSQwyg.png
这是我的数据集的前50行:
structure(list(`Respondent sequence number` = c(21029L, 21034L,
21043L, 21056L, 21067L, 21085L, 21087L, 21105L, 21107L, 21109L,
21110L, 21125L, 21129L, 21138L, 21141L, 21154L, 21193L, 21195L,
21206L, 21215L, 21219L, 21221L, 21232L, 21239L, 21242L, 21247L,
21256L, 21258L, 21287L, 21310L, 21325L, 21367L, 21380L, 21385L,
21413L, 21418L, 21420L, 21423L, 21427L, 21432L, 21437L, 21441L,
21444L, 21453L, 21466L, 21467L, 21477L, 21491L, 21494L, 21495L
), `Estimated percent body fat` = c(NA, 7.2, NA, NA, 24.1, 25.1,
30.2, 23.6, 24.3, 31.4, NA, 14.1, 20.5, NA, 23.1, 30.6, 21, 20.9,
NA, 24, 26.7, 16.6, NA, 26.9, 16.9, 21.3, 15.9, 27.4, 13.9, NA,
20, NA, 12.8, NA, 33.8, 18.1, NA, NA, 28.4, 10.9, 38.1, 33, 39.3,
15.9, 32.7, NA, 20.4, 16.8, NA, 29)), row.names = c(NA, 50L), class =
"data.frame")
答案 0 :(得分:2)
首先,我建议使用干净的列名。
dat <- dataset
names(dat) <- tolower(gsub("\\s", "\\.", names(dat)))
根据基数R可以执行以下操作。关键是,您可以存储分位数和自定义位置,以在以后使用它们作为坐标,从而为您提供动态定位。我不确定ggplot
是否/如何做到这一点。
plot(density(dat$estimated.percent.body.fat, na.rm=TRUE), ylim=c(0, .05),
main="Density curve")
abline(v=c1 <- dat$estimated.percent.body.fat[12], col="red")
abline(v=q1 <- quantile(dat$estimated.percent.body.fat, .05, na.rm=TRUE), col="grey")
abline(v=q2 <- quantile(dat$estimated.percent.body.fat, .95, na.rm=TRUE), col="grey")
text(c1 + 4, .05, c(expression("" %<-% "custom")), cex=.8)
text(q1 - 5.5, .025, c(expression("5% percentile" %->% "")), cex=.8)
text(q2 + 5.5, .025, c(expression("" %<-% "95% percentile")), cex=.8)
注意:如果您不喜欢箭头,例如"5% percentile"
而不是c(expression("5% percentile" %->% ""))
。
或者在ggplot
中,您可以使用annotate
。
library(ggplot2)
ggplot(dataset, aes(x = dataset$`Estimated percent body fat`)) +
geom_density() +
geom_vline(aes(xintercept = dataset$`Estimated percent body fat`[12]),
color = "red", size = 1) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.05, na.rm = TRUE)),
color = "grey", size = 0.5) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.95, na.rm = TRUE)),
color="grey", size=0.5) +
annotate("text", x=16, y=.05, label="custom") +
annotate("text", x=9.5, y=.025, label="5% percentile") +
annotate("text", x=38, y=.025, label="95% percentile")
注意 ,在两种解决方案中,结果(即确切的标签位置)取决于您的导出尺寸。要了解如何控制此功能,请采取看看How to save a plot as image on the disk?。
数据
dataset <- structure(list(`Respondent sequence number` = c(21029L, 21034L,
21043L, 21056L, 21067L, 21085L, 21087L, 21105L, 21107L, 21109L,
21110L, 21125L, 21129L, 21138L, 21141L, 21154L, 21193L, 21195L,
21206L, 21215L, 21219L, 21221L, 21232L, 21239L, 21242L, 21247L,
21256L, 21258L, 21287L, 21310L, 21325L, 21367L, 21380L, 21385L,
21413L, 21418L, 21420L, 21423L, 21427L, 21432L, 21437L, 21441L,
21444L, 21453L, 21466L, 21467L, 21477L, 21491L, 21494L, 21495L
), `Estimated percent body fat` = c(NA, 7.2, NA, NA, 24.1, 25.1,
30.2, 23.6, 24.3, 31.4, NA, 14.1, 20.5, NA, 23.1, 30.6, 21, 20.9,
NA, 24, 26.7, 16.6, NA, 26.9, 16.9, 21.3, 15.9, 27.4, 13.9, NA,
20, NA, 12.8, NA, 33.8, 18.1, NA, NA, 28.4, 10.9, 38.1, 33, 39.3,
15.9, 32.7, NA, 20.4, 16.8, NA, 29)), row.names = c(NA, 50L), class =
"data.frame")