将多级jags模型从宽格式转换为长格式

时间:2017-01-16 00:27:35

标签: r bayesian hierarchical jags runjags

我有一个多级jags模型。我试图将其从宽格式转换为长格式,如下所述:http://jeromyanglim.tumblr.com/post/37361593128/jags-converting-multilevel-model-from-wide-to但是我的模型比示例更复杂,因此我在制作此工作时遇到了一些麻烦。为了说明困难,我做了一个可重复的例子。第一个块创建数据并设置jags参数:

library(ecodist)
library(runjags)
set.seed(10)

##### population n
n <- 250
# num outputs
num.ys <- 10

# Vector binary to indicate which domains have correlation with independent variables
corr.vec <- c(0, 0, 0, 1, 1, 0, 0, 1, 1, 1)
correlation = 0.99

# Function to simulate correlated outcome
sim.fn <- function(i, var1, sw1) {
    if(sw1 ==1){
        temp <- corgen(n , var1, correlation )
        temp <- as.numeric(temp$y * attr(temp$y,'scaled:scale') + attr(temp$y,'scaled:center'))
    } else {
        temp <- rnorm(n, 0, 5)
    }
    return(temp)
}

##### Generate data
df0 <- data.frame(var1=rnorm(n, 15, 2))
df1 <- data.frame(df0, sapply(1:num.ys, function(i) sim.fn(i, df0$var1, corr.vec[i])))

out.names <- paste0("y_", 1:num.ys)
names(df1) <- c("var1", out.names)

### Jags parameters
parameters = c("B1O", "b1", "b1o", "nu", "sd")
adaptSteps = 1000             # Number of steps to "tune" the samplers.
burnInSteps = 10000            # Number of steps to "burn-in" the samplers.
nChains = 2                   # Number of chains to run.
numSavedSteps=1000          # Total number of steps in chains to save.
thinSteps=2                   # Number of steps to "thin" (1=keep every step).
nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain.

好的,接下来的部分就是广泛的格式&#39; jags model thats在对象mcmcChain中提供正确的估计值:

modelstring = "
model {
for( i in 1 : nData ) {
    for(np in 1:nVars){
        y[i, np]  ~ dt( mu[i,np], tau, nu)
        mu[i, np] <- b0s[i] + (b1 + b1o[np]) * x1[i]
    }
}

#Random effects
for(i in 1:nData){
    b0s[i] ~ dnorm(0, b0stau)
}

#Outcome level
for (np in 1:nVars){
    b1o[np] ~ dnorm(0, b1otau)
}

##### Priors
#Overarching Level
b1 ~ dnorm(0, 0.0001)

#
b0stau <- pow(b0ssd, -2)
b0ssd  ~ dt(0, 1/625, 1)T(0,)

# tau & nu priors
nuI ~ dunif(0.001,0.5)
nu <- 1/nuI
tau <- pow(sd, -2)
sd ~ dunif(0, 10)

b1otau <- pow(b1osd, -2)
b1osd  ~ dt(0, 1/625, 1)T(0,)
b1dtau <- pow(b1dsd, -2)
b1dsd  ~ dt(0, 1/625, 1)T(0,)

#Transformations
for(np in 1:nVars){
    B1O[np] <- b1 + b1o[np]
}
}
" # close quote for modelstring
writeLines(modelstring,con="model.jags.no_dom.test.txt")

zy <- (df1[, out.names])
sc_ys <- data.frame(lapply(zy, function(x) scale(x)) )

dataList = list( y = as.matrix(sc_ys), x1 = as.numeric(scale(df1$var1,)), 
             nVars = num.ys, nData = nrow(df1))

# Run this model via run.jags
codaSamples <- run.jags(model="model.jags.no_dom.test.txt" , data=dataList , method ="parallel", n.chains=nChains, monitor=parameters,
                    adapt = adaptSteps, burnin = burnInSteps, sample=nPerChain, thin=thinSteps)

mcmcChain <- data.frame(summary( codaSamples ))
mcmcChain

因此,BO输出接近于生成数据的相关性。 接下来是我对#34;长格式的尝试&#34;模型类似于上面链接中的解释。

modelstring = "
model {
for( i in 1 : nData ) {
    y[i]  ~ dt( mu[i] , tau, nu )
    mu[i] <- b0s[i] + (b1 + b1o[idx[i]]) * x1[i]
}

#Random effects
for(i in 1:nData){
    b0s[i] ~ dnorm(0, b0stau)
}

#Outcome level
for (y in 1:nVars){
    b1o[y] ~ dnorm(0, b1otau[y])
}

##### Priors
#Overarching Level
b1 ~ dnorm(0, 0.0001)

b0stau <- pow(b0ssd, -2)
b0ssd  ~ dt(0, 1/625, 1)T(0,)

for (y in 1:nVars){
    b1otau[y] <- pow(b1osd[y], -2)
    b1osd[y]  ~ dt(0, 1/625, 1)T(0,)
}

tau <- pow(sd, -2)
sd ~ dunif(0, 10)
nuI ~ dunif(0.001,0.5)
nu <- 1/nuI

#Transformations
for(j in 1:nVars){
    B1O[j]  <- b1 + b1o[j]
}
}
" # close quote for modelstring
writeLines(modelstring,con="model.jags.no_dom.long.test.txt")

# Restructure data into long format
dataList2 = list( y = unlist(sc_ys), x1 = rep (as.numeric(scale(df1$var1,)),  length(out.names)),
             idx = rep(1:length(out.names), each=nrow(df1)),
             nVars = length(out.names), nData = nrow(df1))

codaSamples2 <- run.jags(model="model.jags.no_dom.long.test.txt" , data=dataList2 , method ="parallel", n.chains=nChains, monitor=parameters,
                     adapt = adaptSteps, burnin = burnInSteps, sample=nPerChain, thin=thinSteps)

mcmcChain2 <- data.frame(summary( codaSamples2 ))
mcmcChain2

因此mcmcChain2的结果与mcmcChain的结果不匹配,但我看不出我出错的地方。有人可以帮忙吗?感谢。

1 个答案:

答案 0 :(得分:1)

您的矩阵df1具有nData * nVars元素,但您的长格式模型仅使用第一个nData元素(即实际上您只使用数据的第一列)。主数据循环的最大值需要调整为等于nData * nVars而不仅仅是nData。

此外,您还需要一个表示原始df1的行号的向量,以便您可以正确地索引随机效果b0s,例如b0s [dfrow [I]]。此外,很难遵循数据规范(例如什么是长度(out.names))所以我不确定你是否已经这样做了,但是x1需要重复nVars次,或者你应该使用与随机效应相同的x1 [dfrow [i]]索引(为了您的模型代码的可读性,最好是后者)。

马特