R中glmer的仿真功率计算

时间:2019-03-07 23:01:58

标签: model simulation mixed

在以下情况下,我正在尝试通过仿真进行功率计算。

  1. 可用的,可复制的数据如下

structure(list(id = c("151", "151", "151", "151", "151", "158", "158", "158", "158", "166", "166", "166", "166", "166", "173", "173", "173", "173", "173", "173", "173", "176", "176", "176", "176", "176", "176", "176", "176", "176", "176", "201", "201", "201", "201", "201", "213", "213", "213", "213", "213", "213", "213", "219", "219", "219", "219", "219", "219", "219", "219", "219", "220", "220", "220", "220", "220", "220", "220", "221", "221", "221", "221", "221", "221", "221", "221", "227", "227", "227", "227", "227", "227", "227", "227", "227", "227", "228", "228", "228", "228", "228", "228", "231", "231", "231", "231", "231", "231", "234", "234", "234", "234", "234", "234", "234", "234", "234", "246", "246", "246", "246", "246", "246", "246", "246", "246", "246", "247", "247", "247", "247", "247", "247", "247", "247", "247", "247", "261", "261", "261", "261", "261", "261", "266", "266", "266", "266", "266", "266", "266", "266", "266", "266", "273", "273", "273", "273", "273", "273", "273", "273", "273", "276", "276", "276", "276", "276", "276", "276", "276", "276", "276", "287", "287", "287", "287", "287", "287", "287", "287", "304", "304", "304", "304", "304", "304", "304", "304", "310", "310", "310", "310", "310", "310", "310", "312", "312", "312", "312", "312", "312", "312", "312", "312", "312", "318", "318", "318", "318", "318", "318", "318", "318", "327", "327", "327", "327", "327", "327", "327", "327", "332", "332", "332", "332", "332", "332", "332", "332", "332"), arm = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("C", "I"), class = "factor"), timepoint = structure(c(1L, 1L, 1L, 3L, 3L, 1L, 3L, 2L, 2L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 2L), .Label = c("b", "i", "f"), class = "factor"), social = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2 ), deno = c(2L, 1L, 1L, 2L, 3L, 3L, 1L, 2L, 1L, 3L, 1L, 1L, 6L, 3L, 1L, 1L, 3L, 9L, 3L, 3L, 3L, 2L, 5L, 3L, 2L, 2L, 6L, 3L, 4L, 1L, 3L, 1L, 11L, 1L, 5L, 2L, 2L, 7L, 1L, 1L, 3L, 4L, 6L, 4L, 1L, 6L, 3L, 19L, 3L, 3L, 10L, 12L, 1L, 4L, 1L, 1L, 2L, 11L, 1L, 5L, 1L, 1L, 4L, 9L, 7L, 1L, 6L, 5L, 2L, 9L, 3L, 2L, 6L, 2L, 4L, 2L, 14L, 1L, 2L, 1L, 7L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 13L, 5L, 14L, 17L, 8L, 8L, 4L, 6L, 7L, 10L, 2L, 8L, 6L, 4L, 4L, 2L, 1L, 7L, 11L, 9L, 1L, 2L, 1L, 4L, 2L, 1L, 5L, 1L, 6L, 3L, 4L, 1L, 9L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 12L, 1L, 2L, 3L, 1L, 4L, 4L, 1L, 7L, 2L, 1L, 9L, 2L, 1L, 3L, 8L, 3L, 3L, 4L, 3L, 3L, 5L, 4L, 5L, 10L, 2L, 2L, 3L, 2L, 3L, 4L, 2L, 3L, 2L, 6L, 3L, 1L, 7L, 7L, 1L, 4L, 5L, 2L, 4L, 2L, 3L, 6L, 4L, 2L, 4L, 1L, 2L, 4L, 2L, 4L, 8L, 3L, 3L, 2L, 2L, 1L, 4L, 2L, 2L, 1L, 1L, 3L, 1L, 2L, 1L, 8L, 2L, 9L, 4L, 2L, 4L, 2L, 1L, 3L, 1L, 2L), nume = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 7L, 2L, 1L, 6L, 8L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 2L, 3L, 1L, 0L, 3L, 2L, 0L, 3L, 1L, 1L, 4L, 1L, 3L, 0L, 7L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 3L, 2L, 2L, 1L, 0L, 2L, 1L, 5L, 1L, 2L, 1L, 0L, 1L, 1L, 0L, 1L, 6L, 3L, 0L, 0L, 1L, 0L, 1L, 0L, 2L, 0L, 3L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 3L, 1L, 1L, 1L, 0L, 2L, 3L, 0L, 0L, 3L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 2L, 1L, 1L, 0L, 4L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 2L, 3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)), class = "data.frame", row.names = c(NA, -211L))

在“ id”列下的每个个体在不同的“时间点”有多个“ nume”和“ deno”的观察结果。对于这个问题,“社交”是一个协变量,并不是特别有趣。

  1. 我已经用混合效应(在id上)泊松回归模拟了nume / deno的比率,如下所示

model <- glmer(nume ~ arm*timepoint + (1|id) , data=data, subset=timepoint%in%c("b","f"), offset=log(deno), family='poisson')

存在一个“ deno”偏移量,以解释每次“ nume”观测值的暴露情况,并且仅在两个时间点“ b”(基线)和“ f”(后续)上获取数据子集),因为您可能会注意到,并非所有人都具有“ i”(中级)时间点。

这似乎很合理。

  1. 现在,我想计算该对象的功效,以检测交互的几率,例如等于0.5, 1, 1.5。我想知道我的27个人的样本数量是否还可以,如果可以,那么更好的样本量是什么?

我建立了以下模拟数据集,其中考虑了真实案例中相同数量的个体,并且两个分支中的比例相同,gendersocial中的比例相同。我假设在每个时间点为每个人进行4个测量,但实际上我没有。 数据集的背景结构是固定的,不会进入模拟(例如,哪个人是男性/女性,依此类推)

n.ind       <- 27
prop.arms   <- 0.444
prop.gender <- 0.852
prop.social <- 0.556
howmany.b   <- 4
howmany.i   <- 4
howmany.f   <- 4

id     <- 1:n.ind
arm    <- sample(rep(c('C','I'), c(round(n.ind*prop.arms,0), n.ind-  
          round(n.ind*prop.arms,0) ) ), n.docs, replace=F)
gender <- sample(rep(c('M','F'), c(round(n.ind*prop.gender,0), n.ind-
          round(n.ind*prop.gender,0) ) ), n.ind, replace=F)
social <- sample(rep(c(1,2), c(round(n.ind*prop.social,0), n.ind-
          round(n.ind*prop.social,0) ) ), n.ind, replace=F)
stage.i <- sapply(arm, function(x) ifelse(x%in%"C", 0, 4) )

首先,我建立一个具有个体特征的数据框:

ind.data <- data.frame(id=id, arm=arm, gender=gender, social=social, 
            stage.b=rep(howmany.b, n.docs), stage.i=stage.i, 
            stage.f=rep(howmany.f, n.docs))
ind.data$howmany <- 
            apply(doc.data[,c('stage.b','stage.i','stage.f')],1,sum)

ind    <- rep(1: n.ind, ind.data $howmany)
arm    <- rep(doc.data$arm, ind.data $howmany)
gender <- rep(doc.data$gender, ind.data $howmany )
social <- rep(doc.data$social, ind.data $howmany)
when   <- rep(rep(c('b','i','f'),n.ind), 
                as.vector(t(as.matrix(
                doc.data[,c('stage.b','stage.i','stage.f')]))) ) 

然后,我将设置模拟数据集的“固定”部分(例如,每个人的手臂,性别,社会地位以及进行测量时):

data.sim <- data.frame(ind=ind, arm=arm, gender=gender, 
            social=social, when=when)

## denominators are random in reality
data.sim$deno  <- sample( c(1:14,17,19), nrow(data.sim), 
                  prob=prop.table(table(data$deno)), replace=T)

在这里启动仿真功能:

sim1 <- function(bArm, bwhen, bint, b0, Vind, Verror ){

#random effect on individuals   
D.re <- rnorm(1:n.ind, 0, sqrt(Vind))
# residuals
eps  <- rnorm(nrow(data.sim), 0, sqrt(Verror))

# simulation from model  
data.sim$nume <- rpois(nrow(data.sim), 
             data.sim$deno*exp(D.re[ind] + bArm*(arm=='I') + 
             bwhen*(when=='f') + bint*(arm=='I')*(when=='f') + eps))

# fit the model as done with real data
fit1 <- glmer(nume ~ arm*when + (1|ind), data=data.sim, 
        subset=when%in%c("b","f"), offset=log(deno), family='poisson')

# check p-val of interaction
summary(fit1)$coef[4,4]
}

现在运行模拟:

n.simul = 50
library(tcltk)
pb <- tkProgressBar(max=n.simul)
out1 <- replicate(n.simul, {setTkProgressBar(pb, 
        getTkProgressBar(pb)+1); sim1(bArm=0, bwhen=0, bint=0, b0=0, 
        Vdoc=1, Verror=1)})

mean(out1<0.05)

此代码来自对我的this案例的改编,我认为这与我的案例非常接近。

我了解到,如果我根据原假设(所有系数均等于0,误差等于1)生成,则功效应该接近0.05的值,但看起来并非如此。

我的问题很简单:我不知道自己做的是否正确,也不知道应该在模拟中添加哪些参数。

我了解,如果我对互动几率感兴趣,那么我在模拟中的bint应该是如上所述的0.5, 1, 1.5,但是其他人呢?我还能解释一下对单个协变量的影响吗?

我知道这是一个很长的问题,但是如果您能对此有所帮助,我将不胜感激。

最好, EM

0 个答案:

没有答案