如何使图上点的大小与p值成比例?

时间:2016-07-11 15:00:44

标签: r ggplot2

我有一个p值,x值和y值的向量,它们是使用距离矩阵上的cmdscale命令提供或生成的。简单地用plot(x,y)绘制坐标可以正常工作,但我希望这些点的大小与它们的p值成比例(较小的p值意味着更大的点)。我无法想到这样做的方法,我正在寻找建议。我想到了将p值标准化并按某种因素进行缩放 - plot(..., cex=2*normalized) - 但这不会起作用。下面我已经抛弃了一些我可以使用的示例值来重现性。

> dput(pValues)
c(4.48e-14, 1.66e-12, 2.53e-08, 8.57e-08, 3.4e-07, 5.68e-07, 
9.92e-07, 1.08e-06, 2.82e-06, 1.81e-05, 0.000133, 0.00053, 0.000616, 
0.000846, 0.000947, 0.001110537, 0.001110537, 0.001505779, 0.001573054, 
0.001573054, 0.002112306, 0.002308863, 0.003121497, 0.003121497, 
0.003121497, 0.003121497, 0.003121497, 0.003121497, 0.003121497, 
0.003121497, 0.003177736, 0.004723347, 0.005004768, 0.005301549, 
1.86e-17, 9.18e-17, 2.16e-16, 8.23e-16, 9.2e-16, 1.28e-15, 1.38e-15, 
2.59e-15, 6.43e-15, 6.43e-15, 8.42e-15, 1.21e-14, 1.02e-13, 7.58e-13, 
1.53e-12, 1.96e-11)


> dput(x)
c(-0.546606289027691, -0.513646680083475, 0.157100976250898, 
0.109447441578375, 0.109447441578375, 0.104451507558839, 0.104451507558839, 
0.109447441578375, 0.175507893375115, -0.14664445744836, 0.0543475836486623, 
0.0557408040609083, 0.0893466913878634, 0.0893466913878634, 0.142438485025367, 
0.0470980043880961, -0.0221917747418056, 0.109447441578375, 0.0362416205348296, 
0.0470980043880961, 0.0362416205348296, 0.0347865097394601, 0.0391497309324339, 
0.0413674642703439, 0.0667384023198892, 0.0461182424640277, 0.0413674642703439, 
0.0667384023198892, 0.0461182424640277, 0.0475891023261346, 0.0893466913878634, 
0.0764742527259463, 0.0422421029990655, -0.0221917747418056, 
-0.510082195428624, -0.510082195428624, -0.510082195428624, -0.510082195428624, 
0.53984552027647, 0.457352428403424, -0.510082195428624, -0.510082195428624, 
0.476216399097293, 0.476216399097293, -0.510082195428624, 0.297997535161347, 
-0.510082195428624, 0.397117197655551, 0.440730282360781, 0.0312250127868402)


> dput(y)
c(0.107461316099316, 0.156755909792581, -0.166842986685387, 
-0.141978234324384, -0.141978234324384, -0.0687959347159215, 
-0.0687959347159215, -0.141978234324384, -0.142554658469002, 
-0.0395153544691704, -0.0576565915449701, -0.0936541502757846, 
-0.0438034590304964, -0.0438034590304964, -0.190330058396921, 
-0.0329359077881266, -0.0116066646384657, -0.141978234324384, 
-0.0714188307783769, -0.0329359077881266, -0.0714188307783769, 
-0.054867626805721, -0.0112558858117774, -0.0166800568953671, 
-0.0274480805166001, -0.0331407851151761, -0.0166800568953671, 
-0.0274480805166001, -0.0331407851151761, -0.00455654056913195, 
-0.0438034590304963, -0.0148236474766705, -0.130181815402346, 
-0.0116066646384657, 0.0838569446695995, 0.0838569446695995, 
0.0838569446695995, 0.0838569446695995, 0.0372937912551249, 0.555328846358372, 
0.0838569446695995, 0.0838569446695995, 0.521415820920117, 0.521415820920117, 
0.0838569446695994, -0.506985517718071, 0.0838569446695995, -0.324019743520653, 
0.421305271998988, -0.0312119222707089)

2 个答案:

答案 0 :(得分:4)

您可以将它们绑定到data.frame并将它们ggplot:

df=data.frame(x,y,pValues)
library(ggplot2)
ggplot(data=df) + aes(x=x, y=y, size=-log(pValues)) + geom_point(alpha=0.5, col='blue')

我建议直接绘制p值的对数,相反,所以你得到了正确的直观方式(更大,更重要)

enter image description here

这是快捷方式。如果要自定义绘图并改进图例,我们可以直接在scale_size的trans参数中指定日志转换。你也可以搞乱范围(圆圈的大小范围),将在你的传奇中使用的休息(在原始单位中,要小心),甚至是图例标题。

ggplot(data=df) + aes(x=x, y=y, size=pValues) + geom_point(alpha=0.5, col='blue') +
  scale_size("p-values", trans="log10", range=c(15, 1), breaks=c(1e-17, 1e-15, 1e-10, 1e-5, 1e-3))

enter image description here

请注意,我必须反转范围限制的顺序,因为转换函数中没有减号。

答案 1 :(得分:2)

我认为log10也是最好的;)

#_________ installing Packages
#install.packages("ggplot2", dependencies = TRUE)
#install.packages("gridExtra", dependencies = TRUE)

#--------- loading lib
library("ggplot2")
library("gridExtra")

#Saving in png
png("ggplot2sizing.png",height=400,width=850)

df=data.frame(dOut_x,dOut_y,d_pvalue)
#TomNash proposal
grO <-ggplot(data=df) + aes(x=dOut_x,y=dOut_y, size=-log(d_pvalue)) + geom_point(alpha=0.5, col='blue')+labs(title = "*-log: basic*", plot.title = element_text(hjust = 0))

#Graph2 with scale_color_gradien -log
grTw <-ggplot(data=df, aes(x=dOut_x,y=dOut_y, size=-log(d_pvalue), color=dpuout_y))+geom_point(alpha=0.25)+scale_colour_gradientn(colours=rainbow(4))+labs(title = "*-log*", plot.title = element_text(hjust = 0))

#Graph3 with scale_color_gradien :: log10
grTh <-ggplot(data=df, aes(x=dOut_x,y=dOut_y, size=log10(d_pvalue), color=dpuout_y))+geom_point(alpha=0.25)+scale_colour_gradientn(colours=rainbow(4))+labs(title = "*log10*", plot.title = element_text(hjust = 0))

#Draw it all
grid.arrange(grO, grTw, grTh, ncol=3, top="Stack*R: ggplot2-Sizing")
dev.off()

enter image description here

希望能有所帮助。 祝你好运。