绘制ggplot中“已知”点的模拟覆盖率

时间:2015-05-08 14:01:24

标签: r ggplot2 melt

我有模拟的结果,包括删除数据和重新设计模型,并生成5个beta系数的平均值和CI(AAA:EEE)。样本数据可通过dupt()重现。

data <- structure(list(PercentData = structure(c(1L, 1L, 1L, 1L, 1L,  2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("90Percent",  "80Percent", "70Percent", "60Percent", "50Percent", "40Percent",  "30Percent", "20Percent"), class = "factor"), Beta = c("AAA",  "BBB", "CCC", "DDD", "EEE", "AAA", "BBB", "CCC", "DDD", "EEE",  "AAA", "BBB", "CCC", "DDD", "EEE"), Mean = c(-0.0184798128725727,  0.577389832570274, 0.307079889066798, -1.04434737355186, 0.765444299971639,  -0.0342811658086197, 0.571119844203796, 0.307904693724208, -1.05833526491829,  0.772586633692223, -0.0287982339992084, 0.567559187110271, 0.300408471488675,  -1.05392763762688, 0.768956684863523), UpperCI = c(0.011382484714714,  0.592146704143253, 0.334772268551607, -0.997865978815953, 0.787196643647358,  0.0270716705899447, 0.595047291677895, 0.363220155550484, -1.01101175408862,  0.82142109640807, 0.0501543137571774, 0.597455743424951, 0.351903162023205,  -1.00408187639287, 0.805740012899328), LowerCI = c(-0.0483421104598594,  0.562632960997295, 0.279387509581988, -1.09082876828776, 0.743691956295919,  -0.0956340022071842, 0.547192396729696, 0.252589231897933, -1.10565877574796,  0.723752170976376, -0.107750781755594, 0.537662630795591, 0.248913780954145,  -1.10377339886088, 0.732173356827717)), .Names = c("PercentData",  "Beta", "Mean", "UpperCI", "LowerCI"), row.names = c("X1", "X2",  "X3", "X4", "X5", "X1.1", "X2.1", "X3.1", "X4.1", "X5.1", "X1.2",  "X2.2", "X3.2", "X4.2", "X5.2"), class = "data.frame")

head(data)
#      PercentData Beta        Mean     UpperCI     LowerCI
# X1     90Percent  AAA -0.01847981  0.01138248 -0.04834211
# X2     90Percent  BBB  0.57738983  0.59214670  0.56263296
# X3     90Percent  CCC  0.30707989  0.33477227  0.27938751
# X4     90Percent  DDD -1.04434737 -0.99786598 -1.09082877
# X5     90Percent  EEE  0.76544430  0.78719664  0.74369196
# X1.1   80Percent  AAA -0.03428117  0.02707167 -0.09563400

我可以使用此代码

绘制模拟数据
require(ggplot2)

ggplot(data, aes(x = Beta)) +
  geom_point(aes(y = Mean, color = PercentData),
             position = position_dodge(0.5), 
             size=2.5) +
    geom_errorbar(aes(ymin = LowerCI, 
                      ymax = UpperCI, 
                      color = PercentData), 
                  cex = 1.25, 
                  width = .75, 
                  position = position_dodge(0.5))

enter image description here

我想在上图中添加“真相”。目前,我将真实数据放在不同的DF中,如下所示。

truth <- structure(list(Est = c(-0.0178489366139546, 0.575347417798796,  0.299445933484525, -1.02862600141036, 0.767365594695577), UpperCI = c(0.486793276079609,  0.647987076085212, 0.380433141441644, -0.937511307956846, 0.837682594951183 ), LowerCI = c(-0.522491149307518, 0.502707759512379, 0.218458725527406,  -1.11974069486387, 0.697048594439971), Beta = c("AAA", "BBB",  "CCC", "DDD", "EEE")), .Names = c("Est", "UpperCI", "LowerCI",  "Beta"), row.names = c(NA, 5L), class = "data.frame") 

head(truth)
#           Est    UpperCI    LowerCI Beta
# 1 -0.01784894  0.4867933 -0.5224911  AAA
# 2  0.57534742  0.6479871  0.5027078  BBB
# 3  0.29944593  0.3804331  0.2184587  CCC
# 4 -1.02862600 -0.9375113 -1.1197407  DDD
# 5  0.76736559  0.8376826  0.6970486  EEE

我想将真实数据作为一行添加到上图中,并在下面提供了一个示意图,其中添加的黑线是truth$Est值 - 尽管它们不是用于表示实际值。

如果可能的话,还可以包括真相上限和下限CI。是否可以绘制两条线 - 每个CI值一条线?

enter image description here

我已将真实数据作为单独的DF保留,因为我不确定格式化数据以获得预期结果的最佳方法。我可以根据注释或建议重新格式化,以便将数据放在单个melt()数据框中。

提前致谢。

1 个答案:

答案 0 :(得分:3)

通过一些数据重组,使用geom_segment

变得简单
all.data <- merge(data, truth, by = "Beta")
all.data$xposition <- as.numeric(factor(all.data$Beta))

ggplot(all.data, aes(x = Beta)) +
  geom_point(aes(y = Mean, color = PercentData),
             position = position_dodge(0.5), 
             size=2.5) +
  geom_errorbar(aes(ymin = LowerCI.x, 
                    ymax = UpperCI.x, 
                    color = PercentData), 
                cex = 1.25, 
                width = .75, 
                position = position_dodge(0.5)) +
  geom_segment(aes(y = UpperCI.y, 
                   yend = UpperCI.y, 
                   x = xposition - 0.5, 
                   xend = xposition + 0.5)) +
  geom_segment(aes(y = LowerCI.y, 
                   yend = LowerCI.y, 
                   x = xposition - 0.5, 
                   xend = xposition + 0.5))

enter image description here

有几点需要注意:

  • 使用其他geom向绘图添加其他数据的最简单方法是将其作为数据框中的单独列包含在内。这与绘制错误栏的置信区间列
  • 没有什么不同
  • 要确定段的水平位置,可以使用分类x变量的因子的数值。作为explained by Hadley,分类变量在图上仍然具有数字位置。
  • 您可以通过更改添加的值并减去x和xend(当前为0.5)来更改条形的宽度