通过重复观察融合ggplot的数据

时间:2014-01-30 06:05:32

标签: r ggplot2

我在qqplot中创建了一个数字,并且由于我的数据结构不正确而遇到了很多限制。我一直在学习melt取得了适度的成功,但是今天早些时候碰到了一堵墙,因而发布了...... {/ p>

以下是我的数据:

Data <- structure(list(IndID = 1:21, Area = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L), .Label = c("AAA", "BBB", "CCC"), class = "factor"), ObsVal = c(1.41284403669725, 
1.62790697674419, 1.75, 1.66666666666667, 2.43870967741935, 1.44827586206897, 
1.14925373134328, 1.97492163009404, 1.3202846975089, 2.16176470588235, 
1.04347826086957, 0.886075949367089, 1.07142857142857, 1.70588235294118, 
0.736842105263158, 0.92831541218638, 2.3489932885906, 1.19298245614035, 
1.77333333333333, 1.5092936802974, 1.09375), PredOne = c(1.79816513761468, 
1.69302325581395, 1.3125, 1.66666666666667, 1.58064516129032, 
1.49655172413793, 1.35820895522388, 1.75548589341693, 1.54448398576512, 
2.26470588235294, 0.971014492753623, 0.974683544303797, 1.19642857142857, 
1.14705882352941, 0.7, 0.92831541218638, 1.69127516778523, 1.63157894736842, 
1.58666666666667, 0.936802973977695, 1.3125), PredOneSE = c(0.223991010436964, 
0.157266749617986, 0.26685283856462, 0.279490729147869, 0.217964053891366, 
0.183190936591734, 0.247680519057338, 0.139883305737923, 0.138095727169982, 
0.229908170921439, 0.0958135248131566, 0.119418717242245, 0.110103432730496, 
0.147639754039624, 0.126097806088354, 0.115507093629184, 0.181420305238205, 
0.0996366549877819, 0.247829674731325, 0.121267372297112, 0.305724781523581
), PredTwo = c(NA, NA, NA, NA, NA, NA, NA, 1.68965517241379, 
1.37010676156584, 1.95588235294118, 1.21739130434783, 1.04113924050633, 
1.16071428571429, 1.79411764705882, 0.921052631578947, 1.07885304659498, 
1.59731543624161, 1.15789473684211, 1.4, 0.884758364312268, 1.09375
), PredTwoSE = c(NA, NA, NA, NA, NA, NA, NA, 0.139757218234836, 
0.139437440563685, 0.230049935329134, 0.100384450568462, 0.120136555331195, 
0.108827344413171, 0.160200199902538, 0.129061017287195, 0.118592278416655, 
0.181692974138931, 0.10539741014593, 0.250638082175034, 0.116275618418844, 
0.328923912088501), PredThree = c(1.15596330275229, 1.5953488372093, 
1.3125, 1.44444444444444, 1.4, 1.30344827586207, 1.25373134328358, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, 1.50335570469799, 1.05263157894737, 
1.30666666666667, 0.83271375464684, 1.09375), PredThreeSE = c(0.217917157594837, 
0.156690895211124, 0.265747120521213, 0.283879122849779, 0.211186596624607, 
0.185731296470196, 0.250576068630065, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, 0.182610106475943, 0.104015282782907, 0.251314706554144, 
0.115476874855973, 0.326509616387012), PredFour = c(1.92660550458716, 
1.72558139534884, 1.53125, 1.66666666666667, 2.21290322580645, 
1.5448275862069, 1.46268656716418, 1.8871473354232, 1.54448398576512, 
2.47058823529412, 1.34782608695652, 1.15189873417722, 1.30357142857143, 
2, 1.03157894736842, 1.17921146953405, NA, NA, NA, NA, NA), PredFourSE = c(0.220294176796081, 
0.15685192799583, 0.203552293443519, 0.282184484298284, 0.232052930820242, 
0.18341190292051, 0.245852887002432, 0.13924415405254, 0.13892122505451, 
0.226280189300754, 0.100441922715917, 0.124327614845589, 0.108785137191018, 
0.16032552882793, 0.12815531922484, 0.117223056201348, NA, NA, 
NA, NA, NA)), .Names = c("IndID", "Area", "ObsVal", "PredOne", 
"PredOneSE", "PredTwo", "PredTwoSE", "PredThree", "PredThreeSE", 
"PredFour", "PredFourSE"), row.names = c(1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 10L, 12L, 13L, 14L, 15L, 16L, 18L, 19L, 21L, 22L, 
24L, 25L, 26L), class = "data.frame")

其头部看起来像这样

IndID Area   ObsVal  PredOne PredOneSE PredTwo PredTwoSE PredThree PredThreeSE PredFour
1     1  AAA 1.412844 1.798165 0.2239910      NA        NA  1.155963   0.2179172 1.926606
2     2  AAA 1.627907 1.693023 0.1572667      NA        NA  1.595349   0.1566909 1.725581
3     3  AAA 1.750000 1.312500 0.2668528      NA        NA  1.312500   0.2657471 1.531250
4     4  AAA 1.666667 1.666667 0.2794907      NA        NA  1.444444   0.2838791 1.666667
5     5  AAA 2.438710 1.580645 0.2179641      NA        NA  1.400000   0.2111866 2.212903
6     6  AAA 1.448276 1.496552 0.1831909      NA        NA  1.303448   0.1857313 1.544828
  PredFourSE
1  0.2202942
2  0.1568519
3  0.2035523
4  0.2821845
5  0.2320529
6  0.1834119

我有21个人(IndID 1:21)跨越三个不同的领域(AAA:CCC)。对于每个人,有一个观察值和四个不同的预测,每个预测都有一个SE。在某些情况下,没有(NA)预测或SE(这是故意的)。

使用下面的代码(丑陋且我知道很久!):

#Make the columns with NA numeric
Data$PredTwo<- as.numeric(Data$PredTwo)
Data$PredThree<- as.numeric(Data$PredThree)
Data$PredFour<- as.numeric(Data$PredFour)
Data$PredOneSE<- as.numeric(Data$PredOneSE)
Data$PredTwoSE<- as.numeric(Data$PredTwoSE)
Data$PredThreeSE<- as.numeric(Data$PredThreeSE)

#Make figure
DataFig <- ggplot(Data)+                                                
  geom_point(aes(x=factor(IndID), y=PredOne, color=Area),size=3, shape=1)+
  geom_errorbar(aes(x=IndID, ymin=PredOne-PredOneSE, ymax=PredOne+PredOneSE, color=Area), width=.4, lty = 1, cex=0.75)+
  geom_point(aes(x=IndID, y=PredTwo), color = "red", size=3,shape=1)+
  geom_errorbar(aes(x=IndID, ymin=PredTwo-PredTwoSE, ymax=PredTwo+PredTwoSE, ),width=.4, color = "red", lty = 2, cex=0.75)+
  geom_point(aes(x=IndID, y=PredThree),color = "blue", size=3,shape=1)+
  geom_errorbar(aes(x=IndID, ymin=PredThree-PredThreeSE, ymax=PredThree+PredThreeSE),width=.4, color = "blue", lty = 2, cex=0.75)+
  geom_point(aes(x=IndID, y=PredFour),color = "darkgreen", size=3,shape=1)+
  geom_errorbar(aes(x=IndID, ymin=PredFour-PredFourSE, ymax=PredFour+PredFourSE),width=.4, color = "darkgreen", lty = 2, cex=0.75)+
  geom_point(aes(x=IndID, y=ObsVal, color = Area), size=14,shape="*")+
  scale_color_manual(values=c("red","blue","darkgreen"),labs(fill="Study\n Area"))+
  theme(axis.text.x=element_text(angle=30, hjust=1))
DataFig

我可以制作下图:

enter image description here

虽然这接近我想要的,但糟糕的数据结构让我有了局限性。我在尝试着: 1)包含linetype作为aes()参数,以便它们可以包含在图例中。我希望PredOne有一条实线,PredTwo:PredFour有虚线。

2)dodge点和误差线,以便它们不重叠。对于每个区域,有三个预测。我试图将PredOne(实线)作为中心,并将每个区域dodge的其他两个预测 - 左右两个。

我昨天发布了一个类似的问题that can be found here,但无法正确地将@Henrik的(非常有用的)建议应用于具有更多估算值的更大数据集。

有关如何挽救上述代码或更有效地使用melt的任何建议将不胜感激。

1 个答案:

答案 0 :(得分:4)

要回答问题的第一部分,关于数据结构,您必须使用融合两次:

library(reshape2)
d1 <- melt(Data,id=1:3, measure=c(4,6,8,10))
d2 <- melt(Data,id=1:3, measure=c(5,7,9,11))

data.melt = data.frame(d1,se=d2[[5]])
head(data.melt)
#   IndID Area   ObsVal variable    value        se
# 1     1  AAA 1.412844  PredOne 1.798165 0.2239910
# 2     2  AAA 1.627907  PredOne 1.693023 0.1572667
# 3     3  AAA 1.750000  PredOne 1.312500 0.2668528
# 4     4  AAA 1.666667  PredOne 1.666667 0.2794907
# 5     5  AAA 2.438710  PredOne 1.580645 0.2179641
# 6     6  AAA 1.448276  PredOne 1.496552 0.1831909

这为您提供了一个数据框,其中包含一列variable,用于标识预测的类型(PredOnePredFour),并且具有单独的值和值。

关于第二部分,你可以接近这一点:

ggplot(data.melt) +
  geom_point(aes(x=IndID, y=ObsVal), size=3)+
  geom_point(aes(x=IndID, y=value, color=Area))+
  geom_errorbar(aes(x=IndID, ymax=value+se, ymin=value-se, 
                    color=Area, linetype=variable), width=.5 )+
  scale_linetype_manual(values=c(1,2,2,2))

这没有躲过的错误栏,但是有了这么多ID,躲避会产生一个非常繁忙和令人困惑的情节。我敦促你考虑改为:

  ggplot(data.melt) +
  geom_point(aes(x=IndID, y=ObsVal), size=3)+
  geom_point(aes(x=IndID, y=value, color=Area))+
  geom_errorbar(aes(x=IndID, ymax=value+se, ymin=value-se, color=Area), 
                width=.5 )+
  facet_grid(variable~.)

编辑(对OP评论的回应)

所以这就是你想要的东西,我想。基本思想是添加一个闪避列并使用它来抵消x的美学映射。

data.melt$dodge <- 0
data.melt[data.melt$variable=="PredTwo",]$dodge=-0.2
data.melt[data.melt$variable=="PredThree",]$dodge=+0.2
data.melt[data.melt$variable=="PredFour",]$dodge=-0.2
data.melt[data.melt$variable=="PredFour" & data.melt$Area=="BBB",]$dodge=+0.2
ggplot(data.melt) +
  geom_point(aes(x=IndID, y=ObsVal), size=3)+
  geom_point(aes(x=IndID+dodge, y=value, color=Area))+
  geom_errorbar(aes(x=IndID+dodge, ymax=value+se, ymin=value-se, 
                    color=Area, linetype=variable), width=.1)+
  scale_linetype_manual(values=c(1,2,2,2))+
  facet_wrap(~Area,scales="free",ncol=1)+
  scale_x_continuous(breaks=unique(data.melt$IndID))