geom_smooth不会为具有单独数据帧的SE绘制局部调整平滑

时间:2013-12-13 14:22:16

标签: r ggplot2

我想通过绘制预测/拟合值和包含SE的每个条件的回归线来绘制“拟合”混合模型回归。不幸的是,没有绘制SE(尽管我认为图例的灰色阴影表明它已被处理)。

newdat包含绘制预测值的数据(geom_point); nd包含通过geom_smooth()绘制线条所需的数据。

newdat <- structure(list(v0 = c(55L, 90L, 30L, 23L, 74L, 48L, 25L, 25L, 
60L, 69L, 55L, 41L, 34L, 41L, 53L, 76L, 72L, 64L, 34L, 37L, 75L, 
21L, 26L, 14L, 24L, 19L), treatment = structure(c(2L, 1L, 1L, 
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 
1L, 2L, 1L, 1L, 2L, 2L, 1L), .Label = c("hc", "nhc"), class = "factor"), 
    cse = c(2, 2, 6, 6, -4, -4, 5, 5, -4, -4, -3, -3, -2, -2, 
    3, 3, 2, 2, -4, -4, -7, -7, 4, 4, 2, 2), dv280 = structure(c(28.5954553607209, 
    29.0010807407473, 0.820231380215773, 3.35865456461513, 49.8359456217717, 
    24.461804847022, 6.23032836368822, 0.772936154511909, 41.8150506885472, 
    31.9089377911506, 25.2183508293096, 29.203718756273, 23.4674396239055, 
    18.5277638674685, 14.154110078194, 38.9009660948022, 22.6178239314942, 
    33.7517449606509, 26.9191029554161, 20.5609256858118, 55.5863616856965, 
    20.0644146304084, 2.85339319855906, 1.65402829619576, 10.8349022942953, 
    3.82267888202684), .Dim = c(26L, 1L), .Dimnames = list(c("1", 
    "2", "3", "4", "5", "6", "7", "8", "11", "12", "13", "14", 
    "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
    "25", "26", "27", "28"), NULL)), plo = c(18.2940632968672, 
    8.70682874092615, -9.57004073754051, -7.05295432875793, 35.2691733515267, 
    14.2687966060566, -3.12208622604343, -8.52627071371677, 30.1788256344375, 
    18.2506947724591, 14.8705702665525, 20.3644901882128, 15.3980231727933, 
    10.4235410902273, 3.52894178176158, 22.3750340692014, 7.67201979003711, 
    21.2004011925819, 16.8945364920955, 10.6654316626679, 39.1117560188314, 
    4.71896161593837, -5.54649636719771, -8.03839072475669, 3.25706574634023, 
    -4.38303434571468), phi = c(38.8968474245745, 49.2953327405684, 
    11.2105034979721, 13.7702634579882, 64.4027178920167, 34.6548130879875, 
    15.5827429534199, 10.0721430227406, 53.4512757426569, 45.5671808098421, 
    35.5661313920666, 38.0429473243332, 31.5368560750176, 26.6319866447097, 
    24.7792783746264, 55.4268981204031, 37.5636280729513, 46.3030887287198, 
    36.9436694187367, 30.4564197089558, 72.0609673525617, 35.4098676448784, 
    11.2532827643158, 11.3464473171482, 18.4127388422504, 12.0283921097684
    ), tlo = c(18.2877068225676, 8.70360144639113, -9.57634287064189, 
    -7.05924355454202, 35.2646774598802, 14.2623725847359, -3.12908722334489, 
    -8.53331173874155, 30.1731979587424, 18.2458999214011, 14.8642422705033, 
    20.3570830595245, 15.3899100922942, 10.4154628193239, 3.52277889155111, 
    22.371071031997, 7.6676378822382, 21.1951836536363, 16.8880045983016, 
    10.6588146263129, 39.1077806378248, 4.71469379607788, -5.55429056032973, 
    -8.04514630529966, 3.24842694535383, -4.39101280006747), 
    thi = c(38.9032038988741, 49.2985600351034, 11.2168056310734, 
    13.7765526837723, 64.4072137836632, 34.6612371093081, 15.5897439507213, 
    10.0791840477654, 53.456903418352, 45.5719756609001, 35.5724593881158, 
    38.0503544530215, 31.5449691555168, 26.6400649156131, 24.7854412648369, 
    55.4308611576074, 37.5680099807502, 46.3083062676655, 36.9502013125306, 
    30.4630367453107, 72.0649427335683, 35.4141354647389, 11.2610769574479, 
    11.3532028976912, 18.4213776432368, 12.0363705641212)), .Names = c("v0", 
"treatment", "cse", "dv280", "plo", "phi", "tlo", "thi"), row.names = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L), class =

 "data.frame")
    nd <- structure(list(v0 = c(55L, 90L, 30L, 23L, 74L, 48L, 25L, 25L, 
60L, 69L, 55L, 41L, 34L, 41L, 53L, 76L, 72L, 64L, 34L, 37L, 75L, 
21L, 26L, 14L, 24L, 19L), treatment = structure(c(2L, 1L, 1L, 
2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 
1L, 2L, 1L, 1L, 2L, 2L, 1L), .Label = c("hc", "nhc"), class = "factor"), 
    cse = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), dv280 = structure(c(32.0471186922315, 
    32.4527440722579, 11.1752213747477, 13.713644559147, 42.9326189587504, 
    17.5584781840008, 14.8594866924648, 9.4020944832885, 34.911724025526, 
    25.0056111281293, 20.0408558320436, 24.0262237590071, 20.0157762923948, 
    15.0761005359579, 19.3316050754599, 44.0784610920682, 26.0694872630048, 
    37.2034082921615, 20.0157762923948, 13.6575990227905, 43.5055400254093, 
    7.98359297012116, 9.75671986158034, 8.55735495921703, 14.2865656258059, 
    7.27434221353748), .Dim = c(26L, 1L), .Dimnames = list(c("1", 
    "2", "3", "4", "5", "6", "7", "8", "11", "12", "13", "14", 
    "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
    "25", "26", "27", "28"), NULL)), plo = c(22.5072031474275, 
    13.0500664185171, 4.05863186267882, 5.77259649335957, 28.5121051842211, 
    9.29736790581986, 7.2048329083037, 1.78971324244184, 24.2096188947274, 
    11.7540646048434, 10.363946079095, 16.7677263682142, 13.0339097497873, 
    7.77962797988299, 10.0943827426394, 29.0914605330986, 11.9778881852231, 
    25.4951576099086, 13.0339097497873, 6.63425372645495, 28.8025761975293, 
    -0.238742065354621, 2.26914358668319, -1.1076157441286, 6.49404176281806, 
    -1.31100367364568), phi = c(41.5870342370355, 51.8554217259987, 
    18.2918108868166, 21.6546926249345, 57.3531327332797, 25.8195884621816, 
    22.5141404766259, 17.0144757241352, 45.6138291563245, 38.2571576514153, 
    29.7177655849923, 31.2847211497999, 26.9976428350024, 22.3725730920328, 
    28.5688274082805, 59.0654616510377, 40.1610863407866, 48.9116589744144, 
    26.9976428350024, 20.6809443191261, 58.2085038532893, 16.2059280055969, 
    17.2442961364775, 18.2223256625627, 22.0790894887938, 15.8596881007206
    ), tlo = c(22.500339650347, 13.046690851483, 4.0494338564546, 
    5.76435239847513, 28.5075637077657, 9.28944290164532, 7.19628070358881, 
    1.78111359966419, 24.2035002837825, 11.7491226514857, 10.3571796788833, 
    16.7587079630338, 13.024534493447, 7.77065645403329, 10.0872945063297, 
    29.087090666816, 11.9732407398515, 25.4895645373964, 13.024534493447, 
    6.62493376528452, 28.7981219380453, -0.246704406534527, 2.26040076852036, 
    -1.1143904969611, 6.4856407045028, -1.3186296516059), thi = c(41.593897734116, 
    51.8587972930328, 18.3010088930408, 21.6629367198189, 57.3576742097352, 
    25.8275134663562, 22.5226926813408, 17.0230753669128, 45.6199477672694, 
    38.2620996047729, 29.7245319852039, 31.2937395549803, 27.0070180913427, 
    22.3815446178825, 28.5759156445901, 59.0698315173204, 40.1657337861582, 
    48.9172520469266, 27.0070180913427, 20.6902642802966, 58.2129581127733, 
    16.2138903467768, 17.2530389546403, 18.2291004153952, 22.0874905471091, 
    15.8673140786809)), .Names = c("v0", "treatment", "cse", 
"dv280", "plo", "phi", "tlo", "thi"), row.names = c(1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L), class = "data.frame")
p <- ggplot(data=newdat, mapping=aes(x=v0, y=dv280, colour=treatment)) +
  geom_point() +
  geom_smooth(data=nd, method='lm', se=TRUE) +
  scale_colour_discrete(guide=guide_legend(title.position='left', title.hjust=1))
p + .mytheme + coord_cartesian(xlim=c(-20,100)) +     
  geom_hline(yintercept=0, colour='gray35', linetype='dashed') +
  geom_vline(xintercept=0, colour='gray35', linetype='dashed') 

这一切都很好,但不幸的是,SE没有显示出来:

enter image description here

我不明白为什么SE被丢弃(或者可能被其他东西覆盖,因为图例似乎表明看到并识别出SE参数)。

生成newdatnd的代码如下,主要区别在于nd cse的值设置为零。

m <- lmer(dv280 ~ 1 + v0:treatment + cse + (0 + v0 | pp), data=dat, REML=TRUE)
newdat <- data.frame(
  v0=dat$v0,
  treatment=dat$treatment, 
  cse=dat$cse,
  dv280=0)
newdat <- newdat[-c(9,10),]
mm <- model.matrix(terms(m), newdat)
newdat$dv280 <- mm %*% fixef(m)
pvar1 <- diag(mm %*% tcrossprod(vcov(m), mm))
tvar1 <- pvar1 + VarCorr(m)$pp[1]
newdat <- data.frame(newdat, plo=newdat$dv280 - 2 * sqrt(pvar1), phi=newdat$dv280 + 2 * sqrt(pvar1), 
         tlo=newdat$dv280 - 2 * sqrt(tvar1), thi=newdat$dv280 + 2 * sqrt(tvar1)) 
nd <- data.frame(
  v0=dat$v0,
  treatment=dat$treatment, 
  cse=0,
  dv280=0)
nd <- nd[-c(9,10),]
mm <- model.matrix(terms(m), nd)
nd$dv280 <- mm %*% fixef(m)
pvar1 <- diag(mm %*% tcrossprod(vcov(m), mm))
tvar1 <- pvar1 + VarCorr(m)$pp[1]
nd <- data.frame(nd, plo=nd$dv280 - 2 * sqrt(pvar1), phi=nd$dv280 + 2 * sqrt(pvar1), 
         tlo=nd$dv280 - 2 * sqrt(tvar1), thi=nd$dv280 + 2 * sqrt(tvar1)) 

2 个答案:

答案 0 :(得分:3)

在您的代码中,

p <- ggplot(data=newdat, mapping=aes(x=v0, y=dv280, colour=treatment)) +
  geom_point() +
  geom_smooth(data=nd, method='lm', se=TRUE) +
  scale_colour_discrete(guide=guide_legend(title.position='left', title.hjust=1))

您在newdat中使用geom_points(...)作为数据集,在nd中使用geom_smooth(...)。问题是nd显然包含拟合值(例如预测)。因此,来自nd的“数据”周围的值为0.您可以通过键入:

来查看
p <- ggplot(data=nd, mapping=aes(x=v0, y=dv280, colour=treatment)) +
  geom_point() +
  geom_smooth(data=nd, method='lm', se=TRUE) +
  scale_colour_discrete(guide=guide_legend(title.position='left', title.hjust=1))

nd中的点完全位于一条线上,因此se=0。如果使用nd的目的是使用您的某些数据子集来计算lm,那么请将其作为数据集提供给geom_smooth(...)。例如,下面的代码绘制了所有点,但lm是在dv280 > 5的子集上完成的:

p <- ggplot(data=newdat, mapping=aes(x=v0, y=dv280, colour=treatment)) +
  geom_point() +
  geom_smooth(data=subset(newdat,dv280>5), method='lm', se=TRUE) +
  scale_colour_discrete(guide=guide_legend(title.position='left', title.hjust=1))

修改:回应OP的评论。

看起来您正在使lme model符合dat$dv280。所以数据位于dat。另一方面,newdat$dv280是基于模型参数的预测和包含v0treatmentcse的模型矩阵。此外,nd$dv280是基于v0treatment预测cse=0。因此,按nd$dv280分组的v0treatment的情节完全是线性的,这并不奇怪。您的ggplot代码中没有任何地方正在绘制实际数据(例如dat$dv280)。明显的分散仅仅是cse的影响。所以在这一点上,我不确定你要用这个情节来证明什么(??)。

答案 1 :(得分:0)

如果我正确理解了这个问题,我认为以下代码可能会让您走上正轨。您可以明确说明标准错误:

model <- lm(dv280 ~ v0, data=newdat)

err <- stats::predict(model, newdata=newdat, se=TRUE)
newdat$ucl <- err$fit + 1.96 * err$se.fit
newdat$lcl <- err$fit - 1.96 * err$se.fit

qplot(v0, dv280, data=newdat, colour=treatment) + 
  geom_smooth(aes(ymin=lcl, ymax=ucl), data=newdat, method="lm")

这将给出下图:

enter image description here