我在qqplot中创建了一个数字,并且由于我的数据结构不正确而遇到了很多限制。我一直在学习melt
取得了适度的成功,但是今天早些时候碰到了一堵墙,因而发布了...... {/ p>
以下是我的数据:
Data <- structure(list(IndID = 1:21, Area = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L), .Label = c("AAA", "BBB", "CCC"), class = "factor"), ObsVal = c(1.41284403669725,
1.62790697674419, 1.75, 1.66666666666667, 2.43870967741935, 1.44827586206897,
1.14925373134328, 1.97492163009404, 1.3202846975089, 2.16176470588235,
1.04347826086957, 0.886075949367089, 1.07142857142857, 1.70588235294118,
0.736842105263158, 0.92831541218638, 2.3489932885906, 1.19298245614035,
1.77333333333333, 1.5092936802974, 1.09375), PredOne = c(1.79816513761468,
1.69302325581395, 1.3125, 1.66666666666667, 1.58064516129032,
1.49655172413793, 1.35820895522388, 1.75548589341693, 1.54448398576512,
2.26470588235294, 0.971014492753623, 0.974683544303797, 1.19642857142857,
1.14705882352941, 0.7, 0.92831541218638, 1.69127516778523, 1.63157894736842,
1.58666666666667, 0.936802973977695, 1.3125), PredOneSE = c(0.223991010436964,
0.157266749617986, 0.26685283856462, 0.279490729147869, 0.217964053891366,
0.183190936591734, 0.247680519057338, 0.139883305737923, 0.138095727169982,
0.229908170921439, 0.0958135248131566, 0.119418717242245, 0.110103432730496,
0.147639754039624, 0.126097806088354, 0.115507093629184, 0.181420305238205,
0.0996366549877819, 0.247829674731325, 0.121267372297112, 0.305724781523581
), PredTwo = c(NA, NA, NA, NA, NA, NA, NA, 1.68965517241379,
1.37010676156584, 1.95588235294118, 1.21739130434783, 1.04113924050633,
1.16071428571429, 1.79411764705882, 0.921052631578947, 1.07885304659498,
1.59731543624161, 1.15789473684211, 1.4, 0.884758364312268, 1.09375
), PredTwoSE = c(NA, NA, NA, NA, NA, NA, NA, 0.139757218234836,
0.139437440563685, 0.230049935329134, 0.100384450568462, 0.120136555331195,
0.108827344413171, 0.160200199902538, 0.129061017287195, 0.118592278416655,
0.181692974138931, 0.10539741014593, 0.250638082175034, 0.116275618418844,
0.328923912088501), PredThree = c(1.15596330275229, 1.5953488372093,
1.3125, 1.44444444444444, 1.4, 1.30344827586207, 1.25373134328358,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 1.50335570469799, 1.05263157894737,
1.30666666666667, 0.83271375464684, 1.09375), PredThreeSE = c(0.217917157594837,
0.156690895211124, 0.265747120521213, 0.283879122849779, 0.211186596624607,
0.185731296470196, 0.250576068630065, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 0.182610106475943, 0.104015282782907, 0.251314706554144,
0.115476874855973, 0.326509616387012), PredFour = c(1.92660550458716,
1.72558139534884, 1.53125, 1.66666666666667, 2.21290322580645,
1.5448275862069, 1.46268656716418, 1.8871473354232, 1.54448398576512,
2.47058823529412, 1.34782608695652, 1.15189873417722, 1.30357142857143,
2, 1.03157894736842, 1.17921146953405, NA, NA, NA, NA, NA), PredFourSE = c(0.220294176796081,
0.15685192799583, 0.203552293443519, 0.282184484298284, 0.232052930820242,
0.18341190292051, 0.245852887002432, 0.13924415405254, 0.13892122505451,
0.226280189300754, 0.100441922715917, 0.124327614845589, 0.108785137191018,
0.16032552882793, 0.12815531922484, 0.117223056201348, NA, NA,
NA, NA, NA)), .Names = c("IndID", "Area", "ObsVal", "PredOne",
"PredOneSE", "PredTwo", "PredTwoSE", "PredThree", "PredThreeSE",
"PredFour", "PredFourSE"), row.names = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 10L, 12L, 13L, 14L, 15L, 16L, 18L, 19L, 21L, 22L,
24L, 25L, 26L), class = "data.frame")
其头部看起来像这样
IndID Area ObsVal PredOne PredOneSE PredTwo PredTwoSE PredThree PredThreeSE PredFour
1 1 AAA 1.412844 1.798165 0.2239910 NA NA 1.155963 0.2179172 1.926606
2 2 AAA 1.627907 1.693023 0.1572667 NA NA 1.595349 0.1566909 1.725581
3 3 AAA 1.750000 1.312500 0.2668528 NA NA 1.312500 0.2657471 1.531250
4 4 AAA 1.666667 1.666667 0.2794907 NA NA 1.444444 0.2838791 1.666667
5 5 AAA 2.438710 1.580645 0.2179641 NA NA 1.400000 0.2111866 2.212903
6 6 AAA 1.448276 1.496552 0.1831909 NA NA 1.303448 0.1857313 1.544828
PredFourSE
1 0.2202942
2 0.1568519
3 0.2035523
4 0.2821845
5 0.2320529
6 0.1834119
我有21个人(IndID 1:21)跨越三个不同的领域(AAA:CCC)。对于每个人,有一个观察值和四个不同的预测,每个预测都有一个SE。在某些情况下,没有(NA)预测或SE(这是故意的)。
使用下面的代码(丑陋且我知道很久!):
#Make the columns with NA numeric
Data$PredTwo<- as.numeric(Data$PredTwo)
Data$PredThree<- as.numeric(Data$PredThree)
Data$PredFour<- as.numeric(Data$PredFour)
Data$PredOneSE<- as.numeric(Data$PredOneSE)
Data$PredTwoSE<- as.numeric(Data$PredTwoSE)
Data$PredThreeSE<- as.numeric(Data$PredThreeSE)
#Make figure
DataFig <- ggplot(Data)+
geom_point(aes(x=factor(IndID), y=PredOne, color=Area),size=3, shape=1)+
geom_errorbar(aes(x=IndID, ymin=PredOne-PredOneSE, ymax=PredOne+PredOneSE, color=Area), width=.4, lty = 1, cex=0.75)+
geom_point(aes(x=IndID, y=PredTwo), color = "red", size=3,shape=1)+
geom_errorbar(aes(x=IndID, ymin=PredTwo-PredTwoSE, ymax=PredTwo+PredTwoSE, ),width=.4, color = "red", lty = 2, cex=0.75)+
geom_point(aes(x=IndID, y=PredThree),color = "blue", size=3,shape=1)+
geom_errorbar(aes(x=IndID, ymin=PredThree-PredThreeSE, ymax=PredThree+PredThreeSE),width=.4, color = "blue", lty = 2, cex=0.75)+
geom_point(aes(x=IndID, y=PredFour),color = "darkgreen", size=3,shape=1)+
geom_errorbar(aes(x=IndID, ymin=PredFour-PredFourSE, ymax=PredFour+PredFourSE),width=.4, color = "darkgreen", lty = 2, cex=0.75)+
geom_point(aes(x=IndID, y=ObsVal, color = Area), size=14,shape="*")+
scale_color_manual(values=c("red","blue","darkgreen"),labs(fill="Study\n Area"))+
theme(axis.text.x=element_text(angle=30, hjust=1))
DataFig
我可以制作下图:
虽然这接近我想要的,但糟糕的数据结构让我有了局限性。我在尝试着:
1)包含linetype
作为aes()
参数,以便它们可以包含在图例中。我希望PredOne有一条实线,PredTwo:PredFour有虚线。
2)dodge
点和误差线,以便它们不重叠。对于每个区域,有三个预测。我试图将PredOne(实线)作为中心,并将每个区域dodge
的其他两个预测 - 左右两个。
我昨天发布了一个类似的问题that can be found here,但无法正确地将@Henrik的(非常有用的)建议应用于具有更多估算值的更大数据集。
有关如何挽救上述代码或更有效地使用melt
的任何建议将不胜感激。
答案 0 :(得分:4)
要回答问题的第一部分,关于数据结构,您必须使用融合两次:
library(reshape2)
d1 <- melt(Data,id=1:3, measure=c(4,6,8,10))
d2 <- melt(Data,id=1:3, measure=c(5,7,9,11))
data.melt = data.frame(d1,se=d2[[5]])
head(data.melt)
# IndID Area ObsVal variable value se
# 1 1 AAA 1.412844 PredOne 1.798165 0.2239910
# 2 2 AAA 1.627907 PredOne 1.693023 0.1572667
# 3 3 AAA 1.750000 PredOne 1.312500 0.2668528
# 4 4 AAA 1.666667 PredOne 1.666667 0.2794907
# 5 5 AAA 2.438710 PredOne 1.580645 0.2179641
# 6 6 AAA 1.448276 PredOne 1.496552 0.1831909
这为您提供了一个数据框,其中包含一列variable
,用于标识预测的类型(PredOne
到PredFour
),并且具有单独的值和值。
关于第二部分,你可以接近这一点:
ggplot(data.melt) +
geom_point(aes(x=IndID, y=ObsVal), size=3)+
geom_point(aes(x=IndID, y=value, color=Area))+
geom_errorbar(aes(x=IndID, ymax=value+se, ymin=value-se,
color=Area, linetype=variable), width=.5 )+
scale_linetype_manual(values=c(1,2,2,2))
这没有躲过的错误栏,但是有了这么多ID,躲避会产生一个非常繁忙和令人困惑的情节。我敦促你考虑改为:
ggplot(data.melt) +
geom_point(aes(x=IndID, y=ObsVal), size=3)+
geom_point(aes(x=IndID, y=value, color=Area))+
geom_errorbar(aes(x=IndID, ymax=value+se, ymin=value-se, color=Area),
width=.5 )+
facet_grid(variable~.)
编辑(对OP评论的回应)
所以这就是你想要的东西,我想。基本思想是添加一个闪避列并使用它来抵消x的美学映射。
data.melt$dodge <- 0
data.melt[data.melt$variable=="PredTwo",]$dodge=-0.2
data.melt[data.melt$variable=="PredThree",]$dodge=+0.2
data.melt[data.melt$variable=="PredFour",]$dodge=-0.2
data.melt[data.melt$variable=="PredFour" & data.melt$Area=="BBB",]$dodge=+0.2
ggplot(data.melt) +
geom_point(aes(x=IndID, y=ObsVal), size=3)+
geom_point(aes(x=IndID+dodge, y=value, color=Area))+
geom_errorbar(aes(x=IndID+dodge, ymax=value+se, ymin=value-se,
color=Area, linetype=variable), width=.1)+
scale_linetype_manual(values=c(1,2,2,2))+
facet_wrap(~Area,scales="free",ncol=1)+
scale_x_continuous(breaks=unique(data.melt$IndID))