我正在尝试从posthoc Tukey获得的关键值的细粒度可视化。有一些good guidelines out there用于可视化成对比较,但我需要更精确的东西。我的想法是,我会得到一个图,其中每个小方块代表下面矩阵的临界值,编码方式如下:
数据矩阵为here。
或许您可以更好地建议如何将这些关键值可视化?
编辑:根据@Aaron和@DWin的评论,我想为上述数据提供更多的背景信息,并为我的问题提供理由。我正在研究七个虚拟角色的可接受性的平均评级,每个虚拟角色在5个不同的级别上进行动画制作。所以,我有两个因素 - 角色(7个级别)和动作(5个级别)。因为我发现了这两个因素之间的相互作用,所以我决定研究所有运动水平的所有角色的均值之间的差异,这导致了这个庞大的矩阵,作为posthoc Tukey的输出。现在可能太详细了,但请不要把我扔给Cross Validated,他们会活着吃我......答案 0 :(得分:5)
使用image
:
d <- as.matrix(read.table("http://dl.dropbox.com/u/2505196/postH.dat"))
image(x=1:35, y=1:35, as.matrix(d), breaks=c(min(d), -5.45, 5.45, max(d)),
col=c("grey", "white", "black"))
只有一半,使用d[upper.tri(d)] <- NA
将一半设置为缺失,并将na.rm=TRUE
添加到。{1}}
min
和max
函数。
答案 1 :(得分:4)
这是一个ggplot2解决方案。我确信有更简单的方法可以实现这一目标 - 我想我已经被带走了!
library(ggplot2)
# Load data.
postH = read.table("~/Downloads/postH.dat")
names(postH) = paste("item", 1:35, sep="") # add column names.
postH$item_id_x = paste("item", 1:35, sep="") # add id column.
# Convert data.frame to long form.
data_long = melt(postH, id.var="item_id_x", variable_name="item_id_y")
# Convert to factor, controlling the order of the factor levels.
data_long$item_id_y = factor(as.character(data_long$item_id_y),
levels=paste("item", 1:35, sep=""))
data_long$item_id_x = factor(as.character(data_long$item_id_x),
levels=paste("item", 1:35, sep=""))
# Create critical value labels in a new column.
data_long$critical_level = ifelse(data_long$value >= 5.45, "high",
ifelse(data_long$value <= -5.65, "low", "middle"))
# Convert to labels to factor, controlling the order of the factor levels.
data_long$critical_level = factor(data_long$critical_level,
levels=c("high", "middle", "low"))
# Named vector for ggplot's scale_fill_manual
critical_level_colors = c(high="black", middle="grey80", low="white")
# Calculate grid line positions manually.
x_grid_lines = seq(0.5, length(levels(data_long$item_id_x)), 1)
y_grid_lines = seq(0.5, length(levels(data_long$item_id_y)), 1)
# Create plot.
plot_1 = ggplot(data_long, aes(xmin=as.integer(item_id_x) - 0.5,
xmax=as.integer(item_id_x) + 0.5,
ymin=as.integer(item_id_y) - 0.5,
ymax=as.integer(item_id_y) + 0.5,
fill=critical_level)) +
theme_bw() +
opts(panel.grid.minor=theme_blank(), panel.grid.major=theme_blank()) +
coord_cartesian(xlim=c(min(x_grid_lines), max(x_grid_lines)),
ylim=c(min(y_grid_lines), max(y_grid_lines))) +
scale_x_continuous(breaks=seq(1, length(levels(data_long$item_id_x))),
labels=levels(data_long$item_id_x)) +
scale_y_continuous(breaks=seq(1, length(levels(data_long$item_id_x))),
labels=levels(data_long$item_id_y)) +
scale_fill_manual(name="Critical Values", values=critical_level_colors) +
geom_rect() +
geom_hline(yintercept=y_grid_lines, colour="grey40", size=0.15) +
geom_vline(xintercept=x_grid_lines, colour="grey40", size=0.15) +
opts(axis.text.y=theme_text(size=9)) +
opts(axis.text.x=theme_text(size=9, angle=90)) +
opts(title="Critical Values Matrix")
# Save to pdf file.
pdf("plot_1.pdf", height=8.5, width=8.5)
print(plot_1)
dev.off()
答案 2 :(得分:2)
如果你使用findInterval作为bg
,col
和/或pch
参数的索引进行设置(虽然它们目前都是正方形),你应该找到代码相当紧凑,易懂。
您需要先以长格式获取数据;这是一种方式:
d <- as.matrix(read.table("http://dl.dropbox.com/u/2505196/postH.dat"))
dat <- within(as.data.frame(as.table(d)),
{ Var1 <- as.numeric(Var1)
Var2 <- as.numeric(Var2) })
然后代码如下; pch=22
使用实心方块,bg
设置方块的填充颜色,col
设置边框颜色,cex=1.5
只是使它们比默认值大一点。
plot(dat$Var1, dat$Var2,
bg = c("grey", "white", "black")[1+findInterval(dat$Freq, c(-5.45,5.45))],
col="white", cex=1.5, pch = 22)
你需要1+
,因为值为0,1,2而你的指数需要从1开始。
答案 3 :(得分:1)
为了在这里制作一个闭包,我使用了@DWin和@Aaron的大部分建议来创建下面的情节。最轻的灰色代表非重要值。我还使用rect
创建轴名称上方的行,以更好地区分条件:
d <- as.matrix(read.table("http://dl.dropbox.com/u/2505196/postH.dat"))
#remove upper half of the values (as they are mirrored values)
d[upper.tri(d)] <- NA
dat <- within(as.data.frame(as.table(d)),{
Var1 <- as.numeric(Var1)
Var2 <- as.numeric(Var2)})
par(mar=c(6,3,3,6))
colPh=c("gray50","gray90","black")
plot(dat$Var1,dat$Var2,bg = colPh[1+findInterval(dat$Freq, c(-5.45,5.45))],
col="white",cex=1.2,pch = 21,axes=F,xlab="",ylab="")
labDis <- rep(c("A","B","C","D","E"),times=7)
labChar <- c(1:7)
axis(1,at=1:35,labels=labDis,cex.axis=0.5,tick=F,line=-1.4)
axis(1,at=seq(3,33,5),labels=labChar, tick=F)
#drawing lines above axis for better identification
rect(1,0,5,0,angle=90);rect(6,0,10,0,angle=90);rect(11,0,15,0,angle=90);
rect(16,0,20,0,angle=90);rect(21,0,25,0,angle=90);rect(26,0,30,0,angle=90);
rect(31,0,35,0,angle=90)
axis(4,at=1:35,labels=labDis,cex.axis=0.5,tick=F,line=-1.4)
axis(4,at=seq(3,33,5),labels=labChar,tick=F)
#drawing lines above axis for better identification
rect(36,1,36,5,angle=90);rect(36,6,36,10,angle=90);rect(36,11,36,15,angle=90);
rect(36,16,36,20,angle=90);rect(36,21,36,25,angle=90);rect(36,26,36,30,angle=90);
rect(36,31,36,35,angle=90)
legend("topleft",legend=c("not significant","p<0.01","p<0.05"),pch=16,
col=c("gray90","gray50","black"),cex=0.7,bty="n")