我正在尝试构建一个复杂的数字,它覆盖了箱线图上的各个数据点,以显示摘要统计信息以及原始数据的分散。我按重要性排名顺序排列2个问题:
代码:
library(ggplot2)
library(dplyr)
mpg$cyl <- as.factor(mpg$cyl)
mpg %>% filter(fl=="p" | fl=="r" & cyl!="5") %>% sample_n(100) %>% ggplot(aes(cyl, hwy, fill=drv)) +
stat_boxplot(geom = "errorbar", width=0.5, position = position_dodge(1)) +
geom_boxplot(position = position_dodge(1), outlier.shape = NA)+
geom_point(aes(fill=drv, shape=fl), color="black", show.legend=TRUE, alpha=0.5, size=3, position = position_jitterdodge(dodge.width = 1)) +
scale_shape_manual(values = c(21,23))