Question

我试图在R中绘制两个连续变量之间的交互。但是，我的数据是多级的（人们在几天内嵌套）所以我需要在绘制图形时考虑我的数据的嵌套结构。我使用lme4库分析我的数据以解释嵌套结构，但我很难弄清楚如何绘制它。

## example data
spin = runif(600, 1, 24)
reg = runif(600, 1, 15)
ID = rep(c("1","2","3","4","5", "6", "7", "8", "9", "10"))
day = rep(1:30, each = 10)
testdata <- data.frame(
  spin, reg, ID, day)
testdata$fatigue <- testdata$spin*testdata$reg/10*rnorm(30, mean=3, sd=2)

这里我有自变量的spin和reg，疲劳的因变量，以及嵌套在几天内的人（ID）。我在下面运行我的模型。

## running my multilevel model with lme4
library(lme4)
m1 <- lmer(fatigue ~ spin * reg + ( 1 | ID), data = testdata, REML = T)
(m1)
confint(m1, test = "Chisq")

假设我在spin和reg之间有一个交互。我需要将我的连续变量放入一个分类变量中以绘制它。

所以我根据一个连续变量创建分类变量。在这里我选择旋转。注意：不确定下面的代码是否适合我想要的内容。可能要做标准错误？也不考虑我的嵌套数据结构，但不知道该怎么做。

x <- mean(testdata$spin, na.rm = T)
print(x)
y <- sd(testdata$spin, na.rm = T)
print(y)

testdata$SpinLevel[testdata$spin > x+y] <- "High"
testdata$SpinLevel[testdata$spin > x-y & testdata$spin <= x+y] <- "Mean"
testdata$SpinLevel[testdata$spin <= x-y] <- "Low"

rm(x,y)

根据我在网上找到的内容，我可以创建一个显示效果的基本情节。但是不考虑嵌套结构（人 - 变量ID--在几天内嵌套）。

library(ggplot2)
ggplot(testdata,aes(reg,fatigue,linetype=SpinLevel))+
  geom_smooth(method="lm",se=FALSE)

这个ggplot有助于解释基本效果，但这些行很可能会因为不考虑我的数据嵌套结构（几天之内的人）而产生偏差。

我也可以使用效果库来绘制我的模型。这需要考虑嵌套结构。除了图表不漂亮并且是四分位数，并且很难解释。我想它是高，平均，低和所有在同一个图上。但我不知道该怎么做。

library(effects)
plot(effect("spin*reg", m1), grid=TRUE, labels = T,
  xlevels=list(spin=quantile(testdata$spin, seq(0, 1, 0.25))))

有什么想法吗？非常感谢。

Answer 1

我稍微更改模型，使其反映System.getenv("APPDATA")和ID。

这个怎么样：

day

## example data
spin = runif(600, 1, 24)
reg = runif(600, 1, 15)
ID = rep(c("1","2","3","4","5", "6", "7", "8", "9", "10"))
day = rep(1:30, each = 10)
testdata <- data.frame(
spin, reg, ID, day)
testdata$fatigue <- testdata$spin*testdata$reg/10*rnorm(30, mean=3, sd=2)

## running my multilevel model with lme4
library(lme4)
m1 <- lmer(fatigue ~ spin * reg + ( 1 | ID/day), data = testdata, REML = T)
(m1)
confint(m1, test = "Chisq")

x <- mean(testdata$spin, na.rm = T)
print(x)
y <- sd(testdata$spin, na.rm = T)
print(y)

testdata$SpinLevel[testdata$spin > x+y] <- "High"
testdata$SpinLevel[testdata$spin > x-y & testdata$spin <= x+y] <- "Mean"
testdata$SpinLevel[testdata$spin <= x-y] <- "Low"

rm(x,y)

require(multicomp)
mp <- as.data.frame(confint(glht(m1))$confint)
tmp$Comparison <- rownames(tmp)
ggplot(tmp, aes(x = Comparison, y = Estimate, ymin = lwr, ymax = upr)) + geom_errorbar() + geom_point()

此外：

# or
library(multcomp)
tmp <- as.data.frame(confint(glht(m1))$confint)
tmp$Comparison <- rownames(tmp)
ggplot(tmp, aes(x = Comparison, y = Estimate, ymin = lwr, ymax = upr)) + geom_errorbar() + geom_point()

在一个名叫Wes here的人的答案中，还有一些非常有趣的颜色图。

Answer 2

数据设置：

set.seed(101)
spin = runif(600, 1, 24)
reg = runif(600, 1, 15)
ID = rep(c("1","2","3","4","5", "6", "7", "8", "9", "10"))
day = rep(1:30, each = 10)
testdata <- data.frame(spin, reg, ID, day)
testdata$fatigue <- testdata$spin*testdata$reg/10*rnorm(30, mean=3, sd=2)

ID中day是否真的嵌套？从技术上讲，这表明在第1天测量的个体1（ID=1）代表ID=1在第2天测量的不同的人......？

library(lme4)
m1 <- lmer(fatigue ~ spin * reg + ( 1 | ID),
           data = testdata, REML = TRUE)
confint(m1, method = "Wald", parm="beta_")
## instead of test="Chisq", which doesn't work
##                    2.5 %    97.5 %
## (Intercept) -13.44726318 7.4959080
## spin         -0.04751327 1.2328254
## reg          -0.86763792 1.1550787
## spin:reg      0.11263238 0.2541709

为什么模型中没有day ......？

设置预测数据：

## midpoints of bin
 spinvals <- quantile(testdata$spin,seq(0,1,length=5))[2:4]
 pframe <- with(testdata,
           expand.grid(ID=unique(ID),
                       reg=seq(min(reg),max(reg),length.out=51),
                       spin=spinvals))
 pframe$fatigue <- predict(m1,newdata=pframe)
 pframe$spinFac <- factor(pframe$spin,levels=spinvals)
 ## explicit factor() to prevent alphabetization of levels

 library(ggplot2); theme_set(theme_bw())
 g0 <- ggplot(pframe,aes(reg,fatigue,colour=spinFac))+
     geom_line(aes(group=interaction(spinFac,ID)))

 ## bins for cutting testdata into 3 levels (min, 0.33,0.66, max)
 ## label bins by midpoints
 spincuts <- quantile(testdata$spin,seq(0,1,length=4))
 testdata$spinFac <- cut(testdata$spin,
            spincuts,labels=spinvals)

我不太清楚为什么这会降低因素水平...

 g0 + geom_point(data=testdata)

此处初步尝试从effects对象中提取所需数据：

library(effects)
ee <- effect("spin*reg", m1,
   xlevels=list(spin=spinvals))
eedat <- with(ee,data.frame(x,fatigue=fit,lwr=lower,upr=upper))
ggplot(eedat,aes(x=reg,y=fatigue,colour=factor(spin)))+
    geom_line()+
    geom_ribbon(aes(group=spin,ymin=lwr,ymax=upr),colour=NA,
                            alpha=0.4)

绘制两个连续变量与lme4数据的相互作用

2 个答案: