带有离散boxplot数据的geom_path

时间:2017-07-01 16:38:33

标签: r ggplot2

最后用尽我想到的想法和链接来解释这个,所以我需要一些帮助!

我正在尝试使用cumSeg包将步骤函数添加到ggplot图表中。我在this previous question中成功完成了这项工作,所以我习惯使用函数等。

当我在该线程中创建绘图时,它非常简单,仅使用x对y条形图来表示x的平均值,之后我自己添加了错误条(因此它是一个16 x 2数据帧)。

我想重新创建这个图,但是使用顺序箱图而不是条形图,我已经使用原始数据这次,这是16个因子中的约250个观察值(与之前相同的因素)。

现在,当我尝试添加geom_linepathstep时,它抱怨数据的维度不匹配,因为即使有16个因素/箱图,也有现在不再是16次观察(Error: Aesthetics must be either length 1 or the same as the data (249): x, y, colour, group, fill

为了计算阶梯函数,我给出16的每一个的平均值,它返回一个16个成员的向量,而不是〜250(显然)。

如何将步骤函数添加到框图中,以便它理解它应该与16个因子值有关?如果这是数据框的问题或我如何将其提供给ggplot,我无法解决。

我尝试在第二个数据框中指定它,并将其作为geom_path(data=df2)传递,而不是像this question那样继承主图数据,但它仍会抱怨(Error: Aesthetics must be either length 1 or the same as the data (16): x, y, colour, group(代码以下是这种形式仍然)

data.melt <- melt(t(infile)

operon_gc <- 0.408891366
opgc_stdev <- 0.015712091
genome_gc <- 0.425031611
gengc_stdev <- 0.007587437


stepfunc <- jumpoints(y=aggregate(melted_data$value~melted_data$Var1, simplify=TRUE, FUN="mean")$`melted_data$value`, k=1, output="1")
func_data <- data.frame(x = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16), y = stepfunc$fitted.values)

# Make boxplot

bp <- ggplot(melted_data, aes(x=Var1, y=value*100, fill=Var1)) + theme_bw()

#bp <- bp + scale_x_discrete(name = "Locus") + scale_y_continuous(name="GC Content (%)")
bp <- bp + geom_rect(xmin=0, xmax=17,
                                 ymin=(operon_gc-opgc_stdev)*100,
                                 ymax=(operon_gc+opgc_stdev)*100,
                                 fill = "grey79", alpha=0.05)
bp <- bp + geom_rect(xmin=0, xmax=17,
                                 ymin=(genome_gc-gengc_stdev)*100,
                                 ymax=(genome_gc+gengc_stdev)*100,
                                 fill = "beige", alpha=.08)
bp <- bp + geom_abline(intercept=genome_gc*100, slope=0,
                                   colour="gray14", linetype=3)
bp <- bp + geom_abline(intercept=operon_gc*100, slope=0,
                                   colour="gray14", linetype=3)

bp <- bp + geom_boxplot(alpha = 0.7)

bp <- bp + scale_color_manual(values = c("GC Step Fit"="red"), guides(color="Regression"))

bp <- bp + geom_path(linetype=4, size=0.9, aes(x=func_data$x,
                                               y=func_data$y,
                                               color="GC Step Fit",
                                               group=1))
bp <- bp + theme(legend.position="bottom",
                 legend.direction="horizontal",
                 axis.text.x = element_text(angle=45, hjust=1)) + guides(fill=guide_legend(title="", nrow = 1))

bp

数据

> dput(func_data)
structure(list(x = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 
14, 15, 16), y = c(0.452456815737206, 0.452456815737206, 0.452456815737206, 
0.452456815737206, 0.452456815737206, 0.452456815737206, 0.452456815737206, 
0.452456815737206, 0.452456815737206, 0.452456815737206, 0.452456815737206, 
0.375047391939972, 0.375047391939972, 0.375047391939972, 0.375047391939972, 
0.375047391939972)), .Names = c("x", "y"), row.names = c(NA, 
-16L), class = "data.frame")



> dput(melted_data)
structure(list(Var1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
12L, 13L, 14L, 15L, 16L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 14L, 15L, 16L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 15L, 16L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
12L, 15L, 16L, 11L), .Label = c("PVC1", "PVC2", "PVC3", "PVC4", 
"PVC5", "PVC6", "PVC7", "PVC8", "PVC9", "PVC10", "PVC11", "PVC12", 
"PVC13", "PVC14", "PVC15", "PVC16"), class = "factor"), Var2 = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L
), value = c(0.404444444, 0.436329588, 0.46031746, 0.479318735, 
0.466230937, 0.480874317, 0.476811594, 0.441558442, 0.449172577, 
0.476525822, 0.452674897, 0.460918332, 0.368041912, 0.339160839, 
0.415355269, 0.408163265, 0.401826484, 0.45411985, 0.468609865, 
0.479735318, 0.464052288, 0.469945355, 0.476811594, 0.444032158, 
0.453900709, 0.494004796, 0.467315716, 0.457805907, 0.387071651, 
0.390737117, 0.408679065, 0.425170068, 0.355555556, 0.438069217, 
0.423076923, 0.466666667, 0.450980392, 0.422222222, 0.469298246, 
0.43196005, 0.416666667, 0.496402878, 0.428676201, 0.382113821, 
0.349765258, 0.332280147, 0.373371925, 0.346448087, 0.415555556, 
0.440508629, 0.435222672, 0.455833333, 0.446623094, 0.422222222, 
0.463450292, 0.43258427, 0.425675676, 0.497584541, 0.422524565, 
0.392592593, 0.362779741, 0.337552743, 0.379856115, 0.348888889, 
0.391111111, 0.421004566, 0.426439232, 0.480367586, 0.472766885, 
0.455555556, 0.495726496, 0.447565543, 0.424460432, 0.48441247, 
0.435164835, 0.39600551, 0.3858393, 0.323655914, 0.383693046, 
0.329988852, 0.395555556, 0.452380952, 0.454756381, 0.448129252, 
0.496732026, 0.423728814, 0.502923977, 0.433832709, 0.41607565, 
0.498800959, 0.399161736, 0.368421053, 0.386568387, 0.369901547, 
0.398550725, 0.34006734, 0.406392694, 0.455840456, 0.458598726, 
0.43792517, 0.501089325, 0.427777778, 0.49122807, 0.435081149, 
0.416020672, 0.48441247, 0.40617284, 0.379298942, 0.402298851, 
0.361462729, 0.396135266, 0.356666667, 0.353333333, 0.439182916, 
0.469316597, 0.461868038, 0.490196078, 0.405555556, 0.505847953, 
0.430529595, 0.406619385, 0.470023981, 0.395262768, 0.355072464, 
0.373677249, 0.348008386, 0.382804995, 0.355481728, 0.415555556, 
0.481481481, 0.4550036, 0.485074627, 0.501089325, 0.5, 0.51754386, 
0.465043695, 0.438478747, 0.501199041, 0.457733481, 0.416815742, 
0.360672976, 0.388285024, 0.397509579, 0.356589147, 0.384444444, 
0.482917821, 0.452525253, 0.487864078, 0.501089325, 0.488888889, 
0.513157895, 0.47627965, 0.475609756, 0.513189448, 0.471391657, 
0.419797257, 0.38467433, 0.376081425, 0.396666667, 0.370985604, 
0.42, 0.477777778, 0.436063218, 0.476782753, 0.490196078, 0.466666667, 
0.51754386, 0.45505618, 0.44295302, 0.532374101, 0.460707635, 
0.426019548, 0.35755814, 0.389842632, 0.388489209, 0.358730159, 
0.422222222, 0.459610028, 0.473304473, 0.502487562, 0.509803922, 
0.438888889, 0.516081871, 0.480024969, 0.457317073, 0.527577938, 
0.460969293, 0.424148607, 0.386850153, 0.369161868, 0.397677794, 
0.357696567, 0.433333333, 0.450704225, 0.429118774, 0.497031383, 
0.505446623, 0.455555556, 0.492690058, 0.444444444, 0.409722222, 
0.501199041, 0.444812362, 0.414860681, 0.361111111, 0.390096618, 
0.394724221, 0.358803987, 0.426666667, 0.471837488, 0.495748299, 
0.511982571, 0.45, 0.513157895, 0.465043695, 0.438478747, 0.498800959, 
0.453200148, 0.409375, 0.329166667, 0.384172662, 0.38961039, 
0.413333333, 0.406113537, 0.450728363, 0.435244161, 0.431693989, 
0.441520468, 0.427745665, 0.378076063, 0.389671362, 0.427222222, 
0.397905759, 0.423295455, 0.375268817, 0.391111111, 0.39893617, 
0.461538462, 0.437367304, 0.448087432, 0.454678363, 0.421323057, 
0.384787472, 0.394366197, 0.419141914, 0.401331931, 0.423768939, 
0.368817204, 0.42680776)), .Names = c("Var1", "Var2", "value"
), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 
25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 
38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 
64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 
77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 
90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 
102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 
113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 
124L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 
135L, 136L, 137L, 138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 
146L, 147L, 148L, 149L, 150L, 151L, 152L, 153L, 154L, 155L, 156L, 
157L, 158L, 159L, 160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 
168L, 169L, 170L, 171L, 172L, 173L, 174L, 175L, 176L, 177L, 178L, 
179L, 180L, 181L, 182L, 183L, 184L, 185L, 186L, 187L, 188L, 189L, 
190L, 191L, 192L, 193L, 194L, 195L, 196L, 197L, 198L, 199L, 200L, 
201L, 202L, 203L, 204L, 205L, 206L, 207L, 208L, 209L, 210L, 212L, 
213L, 214L, 215L, 216L, 217L, 218L, 219L, 220L, 222L, 223L, 224L, 
225L, 226L, 228L, 229L, 230L, 231L, 232L, 233L, 234L, 235L, 236L, 
239L, 240L, 241L, 242L, 244L, 245L, 246L, 247L, 248L, 249L, 250L, 
251L, 252L, 255L, 256L, 267L), class = "data.frame")

2 个答案:

答案 0 :(得分:1)

我不确定 我是如何解决这个问题的。我只能假设我之前犯了一个非常愚蠢的错误,但这里的代码终于产生了预期的结果:

bp_gc <- ggplot(melted_data, aes(x=Var1, y=value*100)) + theme_bw()
bp_gc <- bp_gc + geom_rect(xmin=0, xmax=17,
                                 ymin=(operon_gc-opgc_stdev)*100,
                                 ymax=(operon_gc+opgc_stdev)*100,
                                 fill = "grey79", alpha=0.05)
bp_gc <- bp_gc + geom_rect(xmin=0, xmax=17,
                                 ymin=(genome_gc-gengc_stdev)*100,
                                 ymax=(genome_gc+gengc_stdev)*100,
                                 fill = "beige", alpha=.08)
bp_gc <- bp_gc + geom_abline(intercept=genome_gc*100, slope=0,
                                   colour="gray14", linetype=3)
bp_gc <- bp_gc + geom_abline(intercept=operon_gc*100, slope=0,
                                   colour="gray14", linetype=3)
bp_gc <- bp_gc + geom_boxplot(alpha = 0.7, fill="dodgerblue", color="gray11")
bp_gc <- bp_gc + ylab("GC Content (%)")
bp_gc <- bp_gc + xlab("Locus")
bp_gc <- bp_gc + theme(legend.position = "none",
                 axis.text.x = element_text(angle=45, hjust=1))
bp_gc <- bp_gc + coord_cartesian(ylim=c(30,60))
bp_gc <- bp_gc + geom_path(data=func_data, linetype=4, size=0.9, aes(x=x,y=y*100))

bp_gc

enter image description here

答案 1 :(得分:0)

我并不是100%明白你想要实现的目标。是这样的吗?

ggplot(melted_df, aes(Var1, value)) + 
  geom_boxplot()

enter image description here

ggplot(df, aes(Var1, value)) + 
  stat_summary(fun.y = median, geom = "path", aes(group = 1)) +
  geom_boxplot()

enter image description here

如果您真的想在主数据框架之外计算统计数据,通常最好这样做:

ggplot(df1, aes(x, y)) + geom_point() + 
  geom_path(data = summarydf, aes(xmean, ymean))