我试图在多级模型中模拟不相等的样本大小。我有四组,样本大小分别为100,200,300和400。 因此,总样本量为1000. w,u0,u1变量在2级; x,r0在1级.y是结果
nSubWithinGroup <- c(100,200,300,400)###the sample size in each group
nGroup <-4 ## 4 groups
gamma00 <- 1
gamma01 <- 1 ## b0 = gamma00+gamma01*w+u0
gamma10 <- 1 ## b1 = gamma10+gamma11*w+u1
gamma11 <- 1
dataLevel1 <- mat.or.vec(sum(nSubWithinGroup),4)
colnames(dataLevel1) <- c("Group","X","W","Y")
rowIndex <- 0
for (group in 1:nGroup) {
u0 <- rnorm(1,mean=0,sd=1)
u1 <- rnorm(1,mean=0,sd=1)
w <- rnorm(1,mean=0,sd=1)
for(i in 1:length(nSubWithinGroup)){
for (j in 1:nSubWithinGroup[i]){
r0 <- rnorm(1,mean=0,sd=1)
x <- rnorm(1,mean=0,sd=1)
y <- (gamma00+gamma01*w+u0)+(gamma10+gamma11*w+u1)*x+r0
rowIndex <- rowIndex + 1
dataLevel1[rowIndex,] <- c(group,x,w,y)
}
}
}
我运行了代码,它向我展示了&#34; Group&#34;列为1,没有2,3或4.此外,它有错误,即:
&#34;
[<-
中的错误(*tmp*
,rowIndex ,,值= c(2,-1.94476463667851,-0.153516782293473,: 下标超出范围&#34;
答案 0 :(得分:1)
您的原始问题在所有for循环中都很难找到,但您在分组级别上进行了两次循环(一次在1:nGroup
,然后再次在1:length(nSubWithinGroup)
。这导致比你在矩阵中允许的更多组合,以及你的错误。(如果你想检查,运行你的循环而不指定dataLevel1,看看rowIndex最后有什么值。
然而,在R中生成这样的数据可能非常慢,并且您使用n = 1的每个函数都可以很容易地用于生成nTotal数。我已经将代码重写为(希望)更具可读性,但也更具矢量化的东西。
#set seed; you can never reproduce your result if you don't do this
set.seed(289457)
#set constants
gamma00 <- 1
gamma01 <- 1 ## b0 = gamma00+gamma01*w+u0
gamma10 <- 1 ## b1 = gamma10+gamma11*w+u1
gamma11 <- 1
#set size parameters
nSubWithinGroup <- c(100,200,300,400)###the sample size in each group
nGroup <-4
nTotal <- sum(nSubWithinGroup)
#simulate group-level data
level2_data <- data.frame(group=1:nGroup,
size=nSubWithinGroup, #not really necessary here, but I like to have everything documented/accessible
u0 = rnorm(nGroup,mean=0,sd=1),
u1 = rnorm(nGroup,mean=0,sd=1),
w = rnorm(nGroup,mean=0,sd=1)
)
#simulate individual_level data (from example code x and r0 where generated in the same way for each individual)
level1_data <- data.frame(id=1:nTotal,
group=rep(1:nGroup, nSubWithinGroup),
r0 = rnorm(nTotal,mean=0,sd=1),
x = rnorm(nTotal, mean=0,sd=1)
)
#several possibilities here, you can merge the two dataframes together or reference the level2data when calculating the outcome
#merging generates more data, but is also readable
combined_data <- merge(level1_data,level2_data,by="group",all.x=T)
#calculate outcome. This can be shortened for instance by calculating some linear parts before
#merging but wanted to stay as close to original code as possible.
combined_data$y <- (gamma00+gamma01*combined_data$w+combined_data$u0)+
(gamma10+gamma11*combined_data$w+combined_data$u1)*combined_data$x+combined_data$r0