我正在尝试创建一个系数图,其中包含数据点的散点图。下面列出了部分数据:
print(dat)
lower upper mode condition obsMean p100 p102 p103 p104 p105
ob.h.Black.650 0.693595 0.950305 0.8849853 h.Black.650 0.8122302 0.85 1.00 0.85 0.85 0.85
ob.h.White.650 0.649625 0.930375 0.8306119 h.White.650 0.7852518 0.70 0.95 0.80 0.80 0.90
ob.h.Black.800 0.833255 0.993105 0.9701335 h.Black.800 0.9035971 0.95 1.00 0.95 0.95 1.00
ob.h.White.800 0.802110 0.983230 0.9469604 h.White.800 0.8762590 0.95 0.95 0.80 1.00 0.85
ob.h.Black.950 0.869375 0.995800 0.9804180 h.Black.950 0.9165468 0.90 1.00 0.85 1.00 1.00
ob.h.White.950 0.862895 0.991750 0.9694811 h.White.950 0.9046763 1.00 1.00 0.95 1.00 0.90
ob.h.Black.1100 0.879340 0.996550 0.9885382 h.Black.1100 0.9323741 1.00 1.00 0.90 0.75 0.85
ob.h.White.1100 0.853110 0.994405 0.9773240 h.White.1100 0.9140288 0.95 0.95 1.00 0.90 0.90
我的代码生成如下所示的输出:
ggplot(dat, aes(x = reorder(condition, 1:8), y = mode)) + #X is cond, Y is mode
geom_point(size = 4) + #Add in black dots for the modes
geom_pointrange(aes(ymin = lower, ymax = upper)) + #Add in 95% HDI bars
geom_point(aes(x = reorder(condition, 1:8), y = obsMean), size = 4, colour = "red") +
geom_point(aes(x = reorder(condition, 1:8), y = p100), #X is cond, Y is a Ps mean
position = position_jitter(width = .2, height = 0), #jitter the data point
size = 2, colour = "blue") + #make it small and blue
geom_point(aes(x = reorder(condition, 1:8), y = p102), #X is cond, Y is a Ps mean
position = position_jitter(width = .2, height = 0), #jitter the data point
size = 2, colour = "blue") + #make it small and blue
geom_point(aes(x = reorder(condition, 1:8), y = p103), #X is condition, Y is a Ps mean
position = position_jitter(width = .2, height = 0), #jitter the data point
size = 2, colour = "blue") + #make it small and blue
geom_point(aes(x = reorder(condition, 1:8), y = p104), #X is condition, Y is a Ps mean
position = position_jitter(width = .2, height = 0), #jitter the data point
size = 2, colour = "blue") + #make it small and blue
geom_point(aes(x = reorder(condition, 1:8), y = p105), #X is condition, Y is a Ps mean
position = position_jitter(width = .2, height = 0), #jitter the data point
size = 2, colour = "blue") + #make it small and blue
coord_cartesian(ylim = c(.6, 1)) + #Scale the graph from .6 to 1
ylab("Hit Rate") + #Add y axis label
theme_bw() + #Use black and white theme
theme(axis.title.x = element_blank(), #Get rid of the x-axis label
panel.background = element_blank(), #Get rid of the background
panel.grid = element_blank(), #Get rid of the gridlines
panel.border = element_rect(colour = "black", fill = NA, size = 1)) #Add border
产生这个情节。
如您所见,代码效率低下。对于我想要在条件均值和置信区间上叠加的每组点,我必须输入另一行代码。在完整数据集中重复139次,这特别麻烦。我想知道是否有更有效的方法将个别重复叠加在均值和CI上。
答案 0 :(得分:4)
正如评论中已经提到的,您应该将数据重新整形为长形式。这可以使用reshape2
,data.table
或tidyr
:
library(reshape2)
dat2 <- melt(dat, measure.vars = c("p100","p102","p103","p104","p105"))
library(data.table)
# method 1 (equivalent to the reshape2 method)
dat2 <- melt(setDT(dat), measure.vars = c("p100","p102","p103","p104","p105"))
# method 2
dat2 <- melt(setDT(dat), measure.vars = patterns("^p"))
library(tidyr)
dat2 <- dat %>% gather(variable, value, c(p100,p102,p103,p104,p105))
接下来,按正确的顺序设置condition
的系数级别:
dat2$condition <- factor(dat2$condition, levels = c("h.Black.650","h.White.650","h.Black.800","h.White.800",
"h.Black.950","h.White.950","h.Black.1100","h.White.1100"))
最后,您可以按如下方式简化绘图代码:
ggplot(dat2, aes(x = condition, y = mode)) +
geom_point(size = 4) +
geom_pointrange(aes(ymin = lower, ymax = upper)) +
geom_point(aes(x = condition, y = obsMean), size = 4, colour = "red") +
geom_point(aes(x = condition, y = value),
position = position_jitter(width = .2, height = 0),
size = 2, colour = "blue") +
coord_cartesian(ylim = c(.6, 1.01)) +
ylab("Hit Rate") +
theme_bw() +
theme(axis.title.x = element_blank(),
panel.background = element_blank(),
panel.grid = element_blank(),
panel.border = element_rect(colour = "black", fill = NA, size = 1))
导致以下情节:
此外,您可以考虑用p变量着色蓝点:
ggplot(dat2, aes(x = condition, y = mode)) +
geom_point(size = 4) +
geom_pointrange(aes(ymin = lower, ymax = upper)) +
geom_point(aes(x = condition, y = obsMean), size = 4, colour = "red") +
geom_point(aes(x = condition, y = value, color = variable),
position = position_jitter(width = .2, height = 0), size = 2) +
coord_cartesian(ylim = c(.6, 1.01)) +
ylab("Hit Rate") +
theme_bw() +
theme(axis.title.x = element_blank(),
panel.background = element_blank(),
panel.grid = element_blank(),
panel.border = element_rect(colour = "black", fill = NA, size = 1))
这导致: