Question

试图弄清楚如何直观地展示调整后的方法在ANCOVA中的工作原理。在主要文献中有一些很好的已发布示例，但是我无法使用ggplot2复制它们的可视化。我要复制的示例：

Packard and Boardman 1999 (Figure 2)和Barrett 2011 (Figure 1)

library(ggplot2)
library(grid)
library(emmeans)
library(HH)
library(multcomp)

Example using 'litter' data:

data(litter)

gest.mean <- mean(litter$gesttime) #mean of the covariate

model1 <- lm(weight ~ gesttime * dose, data=litter)
pred1 <- predict(model1)

model2 <- lm(weight ~ gesttime + dose, data=litter)
pred2 <- predict(model2)

#plot different slopes
plot1 <- ggplot(data = cbind(litter, pred1),
   aes(gesttime, weight, color=dose)) + geom_point() +
geom_line(aes(y=pred1))+  #plots the predicted values (fitted line)
geom_vline(xintercept = gest.mean, linetype="dashed")+
labs(title = "Model1: Separate Slopes ANCOVA", subtitle = "model1 <- 
lm(weight ~ gesttime * dose, data=litter)")

#plot same slopes
plot2 <- ggplot(data = cbind(litter, pred2),
   aes(gesttime, weight, color=dose)) + geom_point() +
geom_line(aes(y=pred2))+
geom_vline(xintercept = gest.mean, linetype="dashed")+
labs(title = "Model2: Equal Slopes ANCOVA", subtitle = "model2 <- lm(weight ~ 
gesttime + dose, data=litter)")

#dashed vertical line shows the mean of covariate
#emmeans are calculated by adjusting points to mean of covariate along group specific slope

grid.newpage()
grid.draw(rbind(ggplotGrob(plot1), ggplotGrob(plot2), size = "last"))

summary(model1)
aov(model1)
summary(model2)
aov(model2)

#compare fits of model with interaction (sep. slopes) vs. model without (eq. slopes)
anova(model1,model2)

#EMmean post hocs to compare differences among four treatments at the grand mean of the covariate
#same as comparing intercepts when slopes are equal

#calculate model1 estimated marginal means (using interaction)
model1.emm <- emmeans(model1, "dose") #note that is gives warning message because sep slopes (interaction)
pairs(model1.emm)

#compare model1 marginal means (LS means)
plot(model1.emm, comparisons = TRUE)
CLD(model1.emm)

#calculate model2 estimated marginal means
model2.emm <- emmeans(model2, "dose")
pairs(model2.emm)

#compare model2 marginal means (LS means)
plot(model2.emm, comparisons = TRUE)
CLD(model2.emm)

#Just to show how EM means are used (intersect grand mean of covariate) 
plot3 <- ggplot(data = cbind(litter, pred2),
            aes(gesttime, weight, color=dose)) + geom_point() +
geom_line(aes(y=pred2))+
geom_vline(xintercept = gest.mean)+
geom_hline(yintercept = 28.87, linetype="dashed", color=c(1))+
geom_hline(yintercept = 29.33, linetype="dashed")+
geom_hline(yintercept = 30.56, linetype="dashed")+
geom_hline(yintercept = 32.35, linetype="dashed")+
labs(title = "Model2: Equal Slopes ANCOVA")

plot3

plot4 <-plot3 +
geom_segment(mapping=aes(x=gesttime, xend=gesttime+0.5, y=weight, 
yend=weight+0.5, colour = "dose"), arrow=arrow(), size=.25, color="blue")

plot4 
#obviously not what I wanted; individuals are not connected to mean of covariate (gesttime=22.08) along group-specific slope (sep. slopes) or common slope (eq. slopes)

Answer 1

你可以做

library(emmeans)
plt = emmip(model2, dose ~ gesttime,
    cov.reduce = range)

到目前为止，您已经拥有一个带有拟合线的ggplot对象。现在，获取调整后的平均值。

emmdat = as.data.frame(emmeans(model2, ~ dose*gesttime))

该数据框具有您需要绘制的预测值和EMM。添加适当的ggplot()代码以将这些点绘制到plt，并显示结果。

同样，使用model1，将说明您为什么会收到警告！调整后的平均值在不同的妊娠时间进行比较。

Answer 2

以下是复制Barrett 2011 ANCOVA图的代码（图1）。我遵循首先拟合交互作用（单独的斜率）并删除不重要的交互作用，以使用相等的斜率拟合调整后的值和调整后的均值（LS均值或EM均值）来生成最小适当模型的过程。

library(ggplot2)
library(dplyr)
library(grid)

#extract data from the Barrett 2011 paper
X <- c(11,21,30,41,52,65,71,77,8,17,29,42,51,64,72,79)
Y <- c(33,32,38,49,51,53,59,65,20,22,31,28,42,52,48,55)
Group <- as.factor(c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2))
data <-data.frame(X,Y,Group)

X.mean <- mean(data$X) #mean of the covariate

model1 <- lm(Y ~ X * Group, data=data)
pred1 <- predict(model1)

model2 <- lm(Y ~ X + Group, data=data)
pred2 <- predict(model2)

#plot different slopes
plot1 <- ggplot(data = cbind(data, pred1),
                aes(X, Y, color=Group)) + geom_point() +
  geom_line(aes(y=pred1))+  #plots the predicted values (fitted line)
  geom_vline(xintercept = X.mean, linetype="dashed", alpha = 0.15)+
  labs(title = "Model1: Separate Slopes ANCOVA", subtitle = "model1 <- lm(Y ~ X * Group, data=data)")+
  theme_classic()

#plot same slopes
plot2 <- ggplot(data = cbind(data, pred2),
                aes(X, Y, color=Group)) + geom_point() +
  geom_line(aes(y=pred2))+
  geom_vline(xintercept = X.mean, linetype="dashed", alpha = 0.15)+
  labs(title = "Model2: Equal Slopes ANCOVA", subtitle = "model2 <- lm(Y ~ X + Group, data=data)")+
  theme_classic()

grid.newpage()
grid.draw(rbind(ggplotGrob(plot1), ggplotGrob(plot2), size = "last"))

summary(model1)
anova(model1)
summary(model2)
anova(model2)
anova(model1, model2) #no sig. difference, drop interaction term and use simplest model (equal slopes)

plot3 <- ggplot(data = cbind(data, pred2),
                aes(X, Y, color=Group)) + 
  geom_point()+
  geom_line(aes(y=pred2))+
  #geom_vline(xintercept = X.mean, linetype="dashed", alpha = 0.45)+
  labs(title = "Model2: Equal Slopes ANCOVA", subtitle = "model2 <- lm(Y ~ X + Group, data=data)")+
  theme_classic()
plot3

#mutate to calc adjusted values of individuals
data <- data%>%mutate(adjY=Y-0.498*(X-X.mean)) 
#0.498 is the 'common slope' of model2; equal slopes ANCOVA 

plot4 <- ggplot(data = cbind(data, pred2),
                aes(X, Y, color=Group)) + 
  geom_point()+
  #geom_line(aes(y=pred2))+
  geom_vline(xintercept = X.mean, linetype="dashed", alpha = 0.45)+
  #labs(title = "Model2: Equal Slopes ANCOVA", subtitle = "model2 <- lm(Y ~ X + Group, data=data)")+
  geom_segment(aes(x=X, xend=X.mean, y=Y, yend=data$adjY), size=.25)+
  theme_classic()
plot4 

plot5 <-ggplot(data, aes(x=Group, y=adjY, color=Group))+
  geom_point()+
  stat_summary(geom="point", fun.y= "mean", shape = 8, color="black", size=5)+
  geom_hline(yintercept = mean(data$adjY), linetype="dashed", alpha = 0.45)+
  theme_classic()
plot5

grid.newpage()
grid.draw(rbind(ggplotGrob(plot3), ggplotGrob(plot4), ggplotGrob(plot5), size = "last"))

Barrett 2011 Figure 1

在ANCOVA（ggplot2）中可视化调整后的均值（LS均值或估计边际均值）过程

2 个答案: