从lme拟合方差结构中提取预测带

时间:2016-11-12 02:34:31

标签: r prediction confidence-interval mixed-models

我知道关于从lme适合获得置信度/预测间隔的讨论,如Extract prediction band from lme fithttp://glmm.wikidot.com/faq中所述。

但是,我有一个包含随机效应和方差结构的模型,但通过上述程序获得的置信/预测区间似乎没有考虑方差结构。

上述程序是否考虑了方差结构的权重,或者我们必须手动考虑它?

例如,请参阅下面的代码

# generating random data
set.seed(20161112)

repetition_per_cell = 5
ncells_per_age = 75

age = c(rep(1, ncells_per_age*repetition_per_cell), rep(2, ncells_per_age*repetition_per_cell), rep(3, ncells_per_age*repetition_per_cell)) # group age
angle.raw = runif(ncells_per_age*3)*90 # random angles
length.raw = c( rnorm(n = ncells_per_age, mean = 130, sd = 15), rnorm(n = ncells_per_age, mean = 100, sd = 10), rnorm(n = ncells_per_age, mean = 25, sd = 5)) # length is a characteristic of the cell, hence it doesn't change accordingly to the repetition

# here I generate the random cells and I group the length and angle for each cell
j = 1
k = 1
cell = 1
length = 1
angle = 1
for (i in 1:(ncells_per_age*repetition_per_cell*3)) {
  cell[i] = j
  length[i] = length.raw[j]
  angle[i] = angle.raw[j]
  k = k + 1

  if (k == (repetition_per_cell + 1)){
    k = 1
    j = j +1
  }
}


# effects of the parameters
effect_Age = c(50, 75, 100)
effect_Angle = c(1, 1.2, 0.1)
effect_Length = c(-0.05, -0.001, -0.02)
sd_ages = c(3, 1, 5)
sd_angle = c(0.5, 1.75, 2)
cell_sd = 3

j = 1
effect = 0
for (i in 1:(ncells_per_age*repetition_per_cell*3)){
  if (j == 1){
      random_effect_cell = rnorm(n = 1, mean = 0, sd = cell_sd)
  }
  agecode = age[i]
  effect[i] = effect_Age[agecode] + angle[i]*effect_Angle[agecode] + length[i]*effect_Length[agecode] + random_effect_cell + rnorm(n = 1, mean = 0, sd = sd_ages[agecode]*exp(sd_angle[agecode]*angle[i]/90) )

  j = j + 1
  if (j == (repetition_per_cell + 1)){
    j = 1
  }
}

data_test = data.frame(effect, angle, age, cell, length)
data_test$age = as.factor(age)


plot(data_test$angle, data_test$effect, col = data_test$age)

# running the mixed-effect model
library(nlme)
model.lme = lme(effect ~ age*angle + length, random = ~1|cell, weights = varComb(varExp(form =~ angle|age), varIdent(form =~ 1|age)), data=data_test, method = "REML")
summary(model.lme)



# getting confidence/prediction interval
length.means = c(mean(data_test$length[data_test$age == '1']), mean(data_test$length[data_test$age == '2']), mean(data_test$length[data_test$age == '3']))
new.data = expand.grid(age = c('1', '2', '3'), length = length.means, angle = seq(from = 0, to = 90, by = 0.5))

# now we need to remove the lengths from age 2 and 3 from age '1', and the same for others age groups
# removing what doesn't make sense
new.data = new.data[new.data$age == '1' & new.data$length == length.means[1] | new.data$age == '2' & new.data$length == length.means[2] | new.data$age == '3' & new.data$length == length.means[3], ]

new.data$pred = predict(model.lme, new.data, level = 0)

Designmat <- model.matrix(formula(model.lme)[-2], new.data)
predvar <- diag(Designmat %*% vcov(model.lme) %*% t(Designmat))
new.data$SE <- sqrt(predvar)
new.data$SE2 <- sqrt(predvar + model.lme$sigma^2)


# plotting
plot(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'], type = 'l', col = 1, ylim = c(0,200), ylab='effect', xlab='angle')
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'], col = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'], col = 3)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] + 2*new.data$SE[new.data$age == '1'], col = 1, lty = 2)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] + 2*new.data$SE[new.data$age == '2'], col = 2, lty = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] + 2*new.data$SE[new.data$age == '3'], col = 3, lty = 2)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] - 2*new.data$SE[new.data$age == '1'], col = 1, lty = 2)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] - 2*new.data$SE[new.data$age == '2'], col = 2, lty = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] - 2*new.data$SE[new.data$age == '3'], col = 3, lty = 2)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] + 2*new.data$SE2[new.data$age == '1'], col = 1, lty = 3)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] + 2*new.data$SE2[new.data$age == '2'], col = 2, lty = 3)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] + 2*new.data$SE2[new.data$age == '3'], col = 3, lty = 3)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] - 2*new.data$SE2[new.data$age == '1'], col = 1, lty = 3)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] - 2*new.data$SE2[new.data$age == '2'], col = 2, lty = 3)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] - 2*new.data$SE2[new.data$age == '3'], col = 3, lty = 3)

points(data_test$angle, data_test$effect, col = data_test$age)

这是我们得到的输出。

enter image description here

我们可以看到模拟数据的方差随着角度的增加而增加,但我们看不到使用上述方法计算的置信度/预测区间。

所以,我试图通过将预测方差乘以lme预测的方差结构和下面的代码来包括方差变化。

# plotting with supposed correction
# values from summary(model.lme)
jump_age = c(1.0000000, 0.3720868, 1.7763930)
exp_age = c(0.004880414, 0.016409874, 0.022219648)
new.data$SE.b <- sqrt(predvar)*jump_age[as.numeric(new.data$age)]*exp(exp_age[as.numeric(new.data$age)]*new.data$angle)
new.data$SE2.b <- sqrt(predvar + model.lme$sigma^2)*jump_age[as.numeric(new.data$age)]*exp(exp_age[as.numeric(new.data$age)]*new.data$angle)

plot(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'], type = 'l', col = 1, ylim = c(0,200), ylab='effect', xlab='angle')
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'], col = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'], col = 3)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] + 2*new.data$SE.b[new.data$age == '1'], col = 1, lty = 2)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] + 2*new.data$SE.b[new.data$age == '2'], col = 2, lty = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] + 2*new.data$SE.b[new.data$age == '3'], col = 3, lty = 2)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] - 2*new.data$SE.b[new.data$age == '1'], col = 1, lty = 2)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] - 2*new.data$SE.b[new.data$age == '2'], col = 2, lty = 2)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] - 2*new.data$SE.b[new.data$age == '3'], col = 3, lty = 2)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] + 2*new.data$SE2.b[new.data$age == '1'], col = 1, lty = 3)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] + 2*new.data$SE2.b[new.data$age == '2'], col = 2, lty = 3)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] + 2*new.data$SE2.b[new.data$age == '3'], col = 3, lty = 3)

lines(new.data$angle[new.data$age == '1'], new.data$pred[new.data$age == '1'] - 2*new.data$SE2.b[new.data$age == '1'], col = 1, lty = 3)
lines(new.data$angle[new.data$age == '2'], new.data$pred[new.data$age == '2'] - 2*new.data$SE2.b[new.data$age == '2'], col = 2, lty = 3)
lines(new.data$angle[new.data$age == '3'], new.data$pred[new.data$age == '3'] - 2*new.data$SE2.b[new.data$age == '3'], col = 3, lty = 3)

points(data_test$angle, data_test$effect, col = data_test$age)

下面的输出似乎更适合数据,但我不确定这是否只是巧合。这是计算包含方差结构的lme模型的置信度/预测带的正确方法吗?

enter image description here

0 个答案:

没有答案