绘制连接点集的线段

时间:2015-05-20 22:01:19

标签: r ggplot2 line facet segment

我试图在每个方面连接x的每个级别的(两个)点集。这是一个可重复的例子:

datum <- structure(list(frequency = c(8L, 7L, 6L, 18L, 5L, 11L, 16L, 15L, 
9L, 8L, 8L, 10L, 2L, 20L, 14L, 3L, 6L, 2L, 2L, 11L, 10L, 6L, 
15L, 19L, 18L, 18L, 8L, 2L, 10L, 15L, 12L, 17L, 1L, 18L, 7L, 
8L, 16L, 4L, 9L, 2L, 7L, 3L, 16L, 7L, 18L, 20L, 9L, 10L, 13L, 
2L, 15L, 7L, 3L, 20L, 4L, 15L, 5L, 7L, 9L, 16L, 5L, 8L, 10L, 
10L, 7L, 10L, 10L, 17L, 7L, 8L, 13L, 13L, 16L, 5L, 20L, 18L, 
13L, 19L, 3L, 8L, 14L, 12L, 20L, 2L, 9L, 13L, 7L, 2L, 5L, 5L, 
13L, 9L, 13L, 7L, 9L, 4L, 4L, 20L, 1L, 4L), band = structure(c(2L, 
4L, 2L, 3L, 2L, 1L, 4L, 1L, 2L, 1L, 3L, 4L, 2L, 4L, 3L, 4L, 3L, 
2L, 3L, 2L, 2L, 4L, 2L, 1L, 1L, 2L, 1L, 4L, 4L, 1L, 4L, 4L, 2L, 
1L, 4L, 4L, 3L, 4L, 1L, 1L, 3L, 4L, 1L, 3L, 4L, 1L, 2L, 1L, 1L, 
2L, 2L, 1L, 3L, 4L, 2L, 1L, 2L, 4L, 2L, 2L, 4L, 4L, 2L, 4L, 4L, 
1L, 1L, 4L, 2L, 3L, 4L, 1L, 2L, 4L, 1L, 2L, 4L, 1L, 1L, 3L, 4L, 
4L, 2L, 2L, 2L, 1L, 3L, 2L, 2L, 2L, 3L, 3L, 1L, 3L, 4L, 3L, 3L, 
1L, 3L, 4L), .Label = c("1", "2", "3", "4"), class = "factor"), 
test = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L
), .Label = c("1", "2"), class = "factor"), knowledge = structure(c(2L, 
3L, 1L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 2L, 2L, 1L, 1L, 
1L, 1L, 3L, 3L, 1L, 2L, 3L, 1L, 1L, 2L, 2L, 1L, 1L, 3L, 2L, 
3L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 3L, 1L, 1L, 2L, 3L, 
3L, 2L, 2L, 3L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 3L, 1L, 1L, 2L, 
1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L, 3L, 2L, 2L, 1L, 2L, 3L, 2L, 
1L, 2L, 3L, 3L, 2L, 1L, 3L, 1L, 3L, 2L, 1L, 3L, 2L, 2L, 3L, 
1L, 1L, 2L, 1L, 2L, 3L, 1L, 3L, 1L), .Label = c("1", "2", 
"3"), class = "factor")), .Names = c("frequency", "band", 
"test", "knowledge"), row.names = c(NA, -100L), class = "data.frame")

这是我到目前为止的代码:

ggplot(datum, aes(knowledge, frequency, color=test)) +
stat_summary(fun.y='mean', geom='point', position=position_dodge(width=.9), size=3) +
facet_grid(~band) +
labs(y='number of words (max = 20)', x='self-report knowledge') + 
scale_x_discrete(labels=c('none', 'form', 'meaning')) 

查看图中最左边的方面(&#39; 1&#39;),我想要一行连接pretest到posttest在none列中,另一行连接pretest到posttest在表格列中,以及在意义栏中连接预测试和后测试的行。我想在每个方面都做到这一点。

我希望这是有道理的,谢谢!

2 个答案:

答案 0 :(得分:1)

我发现过于依赖ggplot进行数据操作/总结可能会伤害到更多的帮助。我不知道如何用位置连接位置躲闪的点。相反,我会做这样的事情:

library(dplyr)
datsum = datum %>%
  group_by(band, knowledge, test) %>%
  summarize(mean = mean(frequency)) %>%
  ungroup %>%
  mutate(knowledge_fac = factor(knowledge, labels = c('none', 'form', 'meaning')))

ggplot(datsum, aes(x = test, y = mean)) +
  geom_path(aes(group = band:knowledge)) +
  geom_point(aes(color = factor(test))) +
  facet_grid(band ~ knowledge_fac) +
  labs(y='number of words (max = 20)', x='self-report knowledge')

答案 1 :(得分:1)

借用Gregor在重复数据方面的工作,我认为这可以满足要求。 mutate()块创建Test为测试1的-0.1和测试2的0.1的数字偏移量。然后将其添加到知识的数值中。结果是传递给ggplot2的数字x。格雷戈尔正确地定义了这些群体,所以其余的都是直截了当的。

library(dplyr)
datsum <-  datum %>%
  group_by(band, knowledge, test) %>%
  summarize(mean = mean(frequency)) %>%
  mutate(Test = 0.1 * (2 * (test == 2) - 1),
         Knowledge = as.numeric(knowledge) + Test) %>%
  ungroup 

ggplot(datsum, aes(x = Knowledge, y = mean, color = test)) +
  geom_path(aes(group = band:knowledge), color = "black") +
  geom_point(size = 3) +
  facet_wrap(~ band, nrow = 1) +
  labs(y='number of words (max = 20)', x='self-report knowledge') +
  scale_color_manual(values = c("orange", "blue")) +
  scale_x_continuous(limits = c(0.5, 3.5), breaks = 1:3, 
                     labels = c("none", "form", "meaning"))