使用分组变量

时间:2019-04-29 00:38:56

标签: r ggplot2

我正在尝试在ggplot中创建带有误差线的折线图,其中x值位于标签的前后,而y值则是不同分组的值

我有以下数据:

structure(list(politics = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 
2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Democrat", 
"Republican"), class = "factor"), outgroup_pairing = c("ingroup", 
"outgroup", "ingroup", "outgroup", "ingroup", "outgroup", "ingroup", 
"outgroup", "ingroup", "outgroup", "ingroup", "outgroup", "ingroup", 
"outgroup", "ingroup", "outgroup"), n = c(70L, 40L, 80L, 40L, 
70L, 40L, 80L, 40L, 70L, 40L, 80L, 40L, 70L, 40L, 80L, 40L), 
    variable = c("mean_outgroup_feelings", "mean_outgroup_feelings", 
    "mean_outgroup_feelings", "mean_outgroup_feelings", "sd_outgroup_feelings", 
    "sd_outgroup_feelings", "sd_outgroup_feelings", "sd_outgroup_feelings", 
    "mean_outgroup_feelings", "mean_outgroup_feelings", "mean_outgroup_feelings", 
    "mean_outgroup_feelings", "sd_outgroup_feelings", "sd_outgroup_feelings", 
    "sd_outgroup_feelings", "sd_outgroup_feelings"), value = c(20.0887142857143, 
    19.49375, 23.62375, 24.59675, 2.35688161103258, 3.44710671484444, 
    2.25773534858986, 3.01420267102051, 20.6534285714286, 25.6335, 
    24.34575, 31.38, 2.43592948564522, 4.15701197160308, 2.3931516332475, 
    3.24980877149442), post = structure(c(1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("before", 
    "after"), class = "factor"), condition = c("Democrat_ingroup_pairing", 
    "Democrat_outgroup_pairing", "Republican_ingroup_pairing", 
    "Republican_outgroup_pairing", "Democrat_ingroup_pairing", 
    "Democrat_outgroup_pairing", "Republican_ingroup_pairing", 
    "Republican_outgroup_pairing", "Democrat_ingroup_pairing", 
    "Democrat_outgroup_pairing", "Republican_ingroup_pairing", 
    "Republican_outgroup_pairing", "Democrat_ingroup_pairing", 
    "Democrat_outgroup_pairing", "Republican_ingroup_pairing", 
    "Republican_outgroup_pairing")), row.names = c(NA, -16L), vars = "politics", drop = TRUE, class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), indices = list(c(0L, 1L, 4L, 
5L, 8L, 9L, 12L, 13L), c(2L, 3L, 6L, 7L, 10L, 11L, 14L, 15L)), group_sizes = c(8L, 
8L), biggest_group_size = 8L, labels = structure(list(politics = structure(1:2, .Label = c("Democrat", 
"Republican"), class = "factor")), row.names = c(NA, -2L), vars = "politics", drop = TRUE, class = "data.frame"))

我可以像这样绘制主要结果:

ggplot(data, 
       aes(x = post, y = value, col = condition, group = condition)) + 
  geom_line() + 
  geom_point() +
  ylab("Value") +
  xlab("Before and After Measures") +
  theme(plot.title = element_text(hjust = 0.5)) +
  ggtitle("Before and After Measurements") +
  labs(color='Pairing Type') 

我试图通过首先制作一个单独的数据框在这些行周围添加错误栏:

error_bars <- tidyr::spread(data, variable, value)

然后将其添加到上方的ggplot链

geom_errorbar(data = filter(error_bars, str_detect(condition, "outgroup")), 
              mapping = aes(x = post, 
                            ymin = mean_outgroup_feelings-sd_outgroup_feelings, 
                            ymax = mean_outgroup_feelings+sd_outgroup_feelings, 
                            group = condition), 
              width=0.1, size=1, color="black") 

但是,它不起作用,因为此数据框没有“值”对象。这样做的最佳方法是什么?

1 个答案:

答案 0 :(得分:1)

您可能正在寻找类似的东西?

(我添加了facet_wrap,以提高可见度,因为线条彼此之间非常接近。)

error_bars <- tidyr::spread(data, variable, value)
colnames(error_bars)[6:7] <- c("mean", "sd") # shorter variable names

ggplot(error_bars,
       aes(x = post, y = mean, color = condition, group = condition)) +
  geom_line() +
  geom_point() +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd)) +
  facet_wrap(~condition)

plot