将散点图添加到ggplot中的系数图中

时间:2015-09-29 23:37:01

标签: r ggplot2

我正在尝试创建一个系数图,其中包含数据点的散点图。下面列出了部分数据:

print(dat)

                   lower    upper      mode    condition   obsMean p100 p102 p103 p104 p105
ob.h.Black.650  0.693595 0.950305 0.8849853  h.Black.650 0.8122302 0.85 1.00 0.85 0.85 0.85
ob.h.White.650  0.649625 0.930375 0.8306119  h.White.650 0.7852518 0.70 0.95 0.80 0.80 0.90
ob.h.Black.800  0.833255 0.993105 0.9701335  h.Black.800 0.9035971 0.95 1.00 0.95 0.95 1.00
ob.h.White.800  0.802110 0.983230 0.9469604  h.White.800 0.8762590 0.95 0.95 0.80 1.00 0.85
ob.h.Black.950  0.869375 0.995800 0.9804180  h.Black.950 0.9165468 0.90 1.00 0.85 1.00 1.00
ob.h.White.950  0.862895 0.991750 0.9694811  h.White.950 0.9046763 1.00 1.00 0.95 1.00 0.90
ob.h.Black.1100 0.879340 0.996550 0.9885382 h.Black.1100 0.9323741 1.00 1.00 0.90 0.75 0.85
ob.h.White.1100 0.853110 0.994405 0.9773240 h.White.1100 0.9140288 0.95 0.95 1.00 0.90 0.90

我的代码生成如下所示的输出:

ggplot(dat, aes(x = reorder(condition, 1:8), y = mode)) + #X is cond, Y is mode
  geom_point(size = 4) + #Add in black dots for the modes
  geom_pointrange(aes(ymin = lower, ymax = upper)) + #Add in 95% HDI bars
  geom_point(aes(x = reorder(condition, 1:8), y = obsMean), size = 4, colour = "red") +
  geom_point(aes(x = reorder(condition, 1:8), y = p100), #X is cond, Y is a Ps mean
    position = position_jitter(width = .2, height = 0), #jitter the data point
    size = 2, colour = "blue") +  #make it small and blue
  geom_point(aes(x = reorder(condition, 1:8), y = p102), #X is cond, Y is a Ps mean
    position = position_jitter(width = .2, height = 0), #jitter the data point
    size = 2, colour = "blue") +  #make it small and blue
  geom_point(aes(x = reorder(condition, 1:8), y = p103), #X is condition, Y is a Ps mean
    position = position_jitter(width = .2, height = 0), #jitter the data point
    size = 2, colour = "blue") +  #make it small and blue
  geom_point(aes(x = reorder(condition, 1:8), y = p104), #X is condition, Y is a Ps mean
    position = position_jitter(width = .2, height = 0), #jitter the data point
    size = 2, colour = "blue") +  #make it small and blue
  geom_point(aes(x = reorder(condition, 1:8), y = p105), #X is condition, Y is a Ps mean
    position = position_jitter(width = .2, height = 0), #jitter the data point
    size = 2, colour = "blue") +  #make it small and blue
  coord_cartesian(ylim = c(.6, 1)) + #Scale the graph from .6 to 1
  ylab("Hit Rate") + #Add y axis label
  theme_bw() +  #Use black and white theme
  theme(axis.title.x = element_blank(), #Get rid of the x-axis label
    panel.background = element_blank(), #Get rid of the background
    panel.grid = element_blank(), #Get rid of the gridlines
    panel.border = element_rect(colour = "black", fill = NA, size = 1))  #Add border

产生这个情节。

enter image description here

如您所见,代码效率低下。对于我想要在条件均值和置信区间上叠加的每组点,我必须输入另一行代码。在完整数据集中重复139次,这特别麻烦。我想知道是否有更有效的方法将个别重复叠加在均值和CI上。

1 个答案:

答案 0 :(得分:4)

正如评论中已经提到的,您应该将数据重新整形为长形式。这可以使用reshape2data.tabletidyr

来完成
library(reshape2)
dat2 <- melt(dat, measure.vars = c("p100","p102","p103","p104","p105"))

library(data.table)
# method 1 (equivalent to the reshape2 method)
dat2 <- melt(setDT(dat), measure.vars = c("p100","p102","p103","p104","p105"))
# method 2
dat2 <- melt(setDT(dat), measure.vars = patterns("^p"))

library(tidyr)
dat2 <- dat %>% gather(variable, value, c(p100,p102,p103,p104,p105))

接下来,按正确的顺序设置condition的系数级别:

dat2$condition <- factor(dat2$condition, levels = c("h.Black.650","h.White.650","h.Black.800","h.White.800",
                                                    "h.Black.950","h.White.950","h.Black.1100","h.White.1100"))

最后,您可以按如下方式简化绘图代码:

ggplot(dat2, aes(x = condition, y = mode)) + 
  geom_point(size = 4) + 
  geom_pointrange(aes(ymin = lower, ymax = upper)) + 
  geom_point(aes(x = condition, y = obsMean), size = 4, colour = "red") +
  geom_point(aes(x = condition, y = value), 
             position = position_jitter(width = .2, height = 0), 
             size = 2, colour = "blue") + 
  coord_cartesian(ylim = c(.6, 1.01)) +
  ylab("Hit Rate") +
  theme_bw() +
  theme(axis.title.x = element_blank(), 
        panel.background = element_blank(), 
        panel.grid = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, size = 1))

导致以下情节:

enter image description here

此外,您可以考虑用p变量着色蓝点:

ggplot(dat2, aes(x = condition, y = mode)) + 
  geom_point(size = 4) + 
  geom_pointrange(aes(ymin = lower, ymax = upper)) + 
  geom_point(aes(x = condition, y = obsMean), size = 4, colour = "red") +
  geom_point(aes(x = condition, y = value, color = variable), 
             position = position_jitter(width = .2, height = 0), size = 2) + 
  coord_cartesian(ylim = c(.6, 1.01)) +
  ylab("Hit Rate") +
  theme_bw() +
  theme(axis.title.x = element_blank(), 
        panel.background = element_blank(), 
        panel.grid = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA, size = 1))

这导致:

enter image description here