如何强制geom_point在R中显示比例大小?

时间:2018-10-20 15:40:00

标签: r ggplot2

我的数据下面(抱歉,有点大)叫做test.dput。我也有绘制此数据的代码。这里的问题是它没有给我read.counts_value变量的比例大小。我怎样才能解决这个问题?谢谢!

我的代码:

p = ggplot(test.dput,
           aes(x = diluted_sample,
               y = contamination_value,
               color = contamination_variable,
               size= read.counts_value))+
  facet_grid(~contamination_variable)
p <- p + geom_point()+scale_color_manual(values = c("red","yellow","black","blue"))+
  scale_size_area()+
  scale_x_discrete(breaks=c( "100%",  "80%",  "60%",  "40%", "20%", "0%"))

数据:

    test.dput<- structure(list(diluted_sample = structure(c(1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 
18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L
), .Label = c("100%", "95%", "90%", "85%", "80%", "75%", "70%", 
"65%", "60%", "55%", "50%", "45%", "40%", "35%", "30%", "25%", 
"20%", "15%", "10%", "5%", "0%"), class = "factor"), contamination_variable = c("Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test1_contamination_of_NA12878", "Test1_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test2_contamination_of_NA12878", 
"Test2_contamination_of_NA12878", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test1_contamination_of_NA12877", "Test1_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877", "Test2_contamination_of_NA12877", 
"Test2_contamination_of_NA12877"), contamination_value = c(99.6, 
99.1, 96.8, 92.6, 88, 82.6, 77.7, 73, 67.9, 63.1, 58.2, 53.2, 
48.1, 43.1, 37.6, 32.2, 26.3, 20.2, 13.8, 7.1, 0.1, 100, 94.15, 
88.72, 83.36, 78.2, 73.08, 68.12, 63.3, 58.52, 53.86, 49.09, 
44.61, 40.12, 35.39, 30.74, 25.82, 20.9, 15.8, 10.59, 5.14, 0.08, 
0.1, 7, 13.6, 20.1, 26.2, 32.1, 37.7, 43.2, 48.3, 53.2, 58.2, 
63.1, 68, 72.8, 77.8, 82.8, 87.8, 92.5, 96.8, 99.1, 99.6, 0.1, 
5.21, 10.5, 15.85, 20.92, 26.04, 30.93, 35.76, 40.41, 45.12, 
49.82, 54.5, 59.14, 64, 68.86, 73.58, 78.49, 83.5, 88.82, 94.26, 
100, 99.6, 99.1, 96.8, 92.6, 88, 82.6, 77.7, 73, 67.9, 63.1, 
58.2, 53.2, 48.1, 43.1, 37.6, 32.2, 26.3, 20.2, 13.8, 7.1, 0.1, 
100, 94.15, 88.72, 83.36, 78.2, 73.08, 68.12, 63.3, 58.52, 53.86, 
49.09, 44.61, 40.12, 35.39, 30.74, 25.82, 20.9, 15.8, 10.59, 
5.14, 0.08, 0.1, 7, 13.6, 20.1, 26.2, 32.1, 37.7, 43.2, 48.3, 
53.2, 58.2, 63.1, 68, 72.8, 77.8, 82.8, 87.8, 92.5, 96.8, 99.1, 
99.6, 0.1, 5.21, 10.5, 15.85, 20.92, 26.04, 30.93, 35.76, 40.41, 
45.12, 49.82, 54.5, 59.14, 64, 68.86, 73.58, 78.49, 83.5, 88.82, 
94.26, 100), read.counts_variable = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L), .Label = c("No_of.reads_from_NA12878", "No_of_reads_from_NA12877"
), class = "factor"), read.counts_value = c(1.5e+07, 14250000, 
13500000, 12750000, 1.2e+07, 11250000, 10500000, 9750000, 9e+06, 
8250000, 7500000, 6750000, 6e+06, 5250000, 4500000, 3750000, 
3e+06, 2250000, 1500000, 750000, 0, 1.5e+07, 14250000, 13500000, 
12750000, 1.2e+07, 11250000, 10500000, 9750000, 9e+06, 8250000, 
7500000, 6750000, 6e+06, 5250000, 4500000, 3750000, 3e+06, 2250000, 
1500000, 750000, 0, 1.5e+07, 14250000, 13500000, 12750000, 1.2e+07, 
11250000, 10500000, 9750000, 9e+06, 8250000, 7500000, 6750000, 
6e+06, 5250000, 4500000, 3750000, 3e+06, 2250000, 1500000, 750000, 
0, 1.5e+07, 14250000, 13500000, 12750000, 1.2e+07, 11250000, 
10500000, 9750000, 9e+06, 8250000, 7500000, 6750000, 6e+06, 5250000, 
4500000, 3750000, 3e+06, 2250000, 1500000, 750000, 0, 0, 750000, 
1500000, 2250000, 3e+06, 3750000, 4500000, 5250000, 6e+06, 6750000, 
7500000, 8250000, 9e+06, 9750000, 10500000, 11250000, 1.2e+07, 
12750000, 13500000, 14250000, 1.5e+07, 0, 750000, 1500000, 2250000, 
3e+06, 3750000, 4500000, 5250000, 6e+06, 6750000, 7500000, 8250000, 
9e+06, 9750000, 10500000, 11250000, 1.2e+07, 12750000, 13500000, 
14250000, 1.5e+07, 0, 750000, 1500000, 2250000, 3e+06, 3750000, 
4500000, 5250000, 6e+06, 6750000, 7500000, 8250000, 9e+06, 9750000, 
10500000, 11250000, 1.2e+07, 12750000, 13500000, 14250000, 1.5e+07, 
0, 750000, 1500000, 2250000, 3e+06, 3750000, 4500000, 5250000, 
6e+06, 6750000, 7500000, 8250000, 9e+06, 9750000, 10500000, 11250000, 
1.2e+07, 12750000, 13500000, 14250000, 1.5e+07)), .Names = c("diluted_sample", 
"contamination_variable", "contamination_value", "read.counts_variable", 
"read.counts_value"), row.names = c(NA, -168L), class = "data.frame")

2 个答案:

答案 0 :(得分:4)

您仅用contamination_variable对数据进行分类,但是此变量包含重复的元素。您可以添加其他类别变量以通过调整facet_grid来区分数据:

facet_grid(read.counts_variable ~ contamination_variable)

enter image description here

答案 1 :(得分:1)

如果设置Alpha,则可以看到这些点正在改变大小

p = ggplot(test.dput,
           aes(x = diluted_sample,
               y = contamination_value,
               color = contamination_variable,
               size= read.counts_value)
        )+
        facet_grid(~contamination_variable) +
        geom_point(alpha=0.2) +
        scale_color_manual(values = c("red","darkgreen","black","blue"))+
        scale_size_area()+
        scale_x_discrete(breaks=c( "100%",  "80%",  "60%",  "40%", "20%", "0%")) +
        theme(axis.text.x = element_text(angle = 90,vjust=0.5))
p

enter image description here