stat_summary中的平均线错误地连接了X轴上的因子组

时间:2018-07-17 12:18:34

标签: r ggplot2

让我们从一些数据入手,以减轻可重复性:

data <- structure(list(group = c("AUS", "AUS", "AUS", "AUS", "AUS", 
                                   "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                                   "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                                   "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                                   "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                                   "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", 
                                   "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", "ERH", "ERH", 
                                   "ERH", "ERH", "ERH", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                   "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "ERH", 
                                   "ERH", "ERH", "ERH", "ERH", "ERH", "ERH", "ERH", "ERH", "PAR", 
                                   "PAR", "PAR", "PAR", "PAR", "PAR", "PAR"), num = c(165, 186, 95, 
                                                                                     168, 170, 180, 126, 131, 77, 116, 158, 100, 134, 119, 132, 117, 
                                                                                     110, 121, 107, 170, 110, 111, 112, 113, 114, 115, 116, 135, 92, 
                                                                                     103, 112, 117, 123, 146, 130, 138, 82, 62, 57, 75, 82, 114, 127, 
                                                                                     138, 92, 94, 96, 110, 90, 92, 93, 95, 97, 99, 101, 106, 100, 
                                                                                     95, 110, 97, NA, 153, 147, 149, 167, 167, 186, 150, 151, 198, 
                                                                                     143, 154, 119, 158, 145, 172, 177, 139, 136, 168, 128, 137, 137, 
                                                                                     138, 152, 127, 116, 156, 114, 157, 145, 114, 143, 151, 144, 121, 
                                                                                     142, 138, 122, 139, 140, 139, 161, 134, 144, 148, 145, 156, 146, 
                                                                                     146, 138, 133, 135, 90, 119, 141, 98, 74, 75, 81, 84, 94, 102, 
                                                                                     102, 112, 158, 162, 184, 202, 118, NA, NA, 133, NA, NA, NA, 122, 
                                                                                     NA, NA, 115, 81, 85, 99, 130, 137, 92, 92, 93, 94, 96, 100, 105, 
                                                                                     105, 107, 109, 119, 134, 138, 138, 143, 147, 159, 105, 107, 109, 
                                                                                     119, 70, 74, 78, 79, 82, 84, 86, 91, 95, 96, 103, 106, 107, 114, 
                                                                                     128, 128, 140, 154, NA, NA, NA, NA, 192, NA, NA, NA, NA, 113, 
                                                                                     NA, NA, NA, NA, NA, 91)), .Names = c("group", "num"), row.names = c(NA, 
                                                                                                                                                          200L), class = "data.frame")

由于我希望X轴上的因子具有特定顺序,因此我定义了对象order

order <- c("AUS", "PAR", "ERH", "ERE", "NEA")

现在,我运行ggplot函数:

library(ggplot2)
ggplot(data, aes(x=group)) + 
  scale_x_discrete(limits=c(order)) +
  geom_violin(aes(y=num, colour=group, alpha=0.30, fill=group), size=0.3) + 
  geom_jitter(aes(y=num, colour=group, fill=group, alpha=0.5), position = position_jitter(width = .1), size=1) +
  stat_summary(aes(y=num, group=1), fun.y=mean, colour="darkred", geom="line", group=1, lwd=0.3, lty=2) +
  stat_summary(aes(y=num), fun.y=mean, colour ="darkred",  geom="point")

我得到这个情节: enter image description here

如您所见,均值点得到了很好的表示,但均值线并非如此。如何使行按显示的顺序通过不同的组?

1 个答案:

答案 0 :(得分:3)

如果您不愿意修改数据,请更改data$group进行分解。

data$group <- factor(data$group, c("AUS", "PAR", "ERH", "ERE", "NEA"))

library(ggplot2)
# Removed some redundant parts from plot code
ggplot(data, aes(group, num, colour = group, fill = group)) + 
    geom_violin(alpha = 0.30, size = 0.3) + 
    geom_jitter(alpha = 0.5, position = position_jitter(width = 0.1), size = 1) +
    stat_summary(fun.y = mean, colour = "darkred", geom = "line", group = 1, lwd = 0.3, lty = 2) +
    stat_summary(fun.y = mean, colour = "darkred", geom = "point")

enter image description here