以GLMM自适应(R)计算置信区间和预测区间

时间:2020-05-13 18:45:41

标签: r

我现在想如何计算此数据的置信区间和预测区间。我想我已经设法完成了第一个,但是我无法在预测间隔内弄清楚。我已经遵循了这个GLMM支持的示例(https://drizopoulos.github.io/GLMMadaptive/articles/Dynamic_Predictions.html#prediction-intervals),但不幸的是没有运气。我不确定我是否正确计算了CI,这就是为什么我也将它们包括在内的原因。

为glmm创建数据框:

library(GLMMadaptive)

vis <- c(60, 14, 22, 22, 13, 13, 7, 15, 17, 14, 14, 163, 17, 19, 87, 87, 58, 40, 73, 4, 203, 34, 34,  11,  38, 127,
  127, 5, 90, 13, 3, 3, 5, 117, 8, 8, 39, 39, 2,   2,   4,   1,   9,  29,  29,  36,  15,  15, 165, 165,   2,  10,
  10, 10, 36, 9,  20,  20, 4, 2, 7, 33, 1, 19, 19, 2,   1, 8,   8,  33,   4, 108,   3,  85,  85,  12,  64,  36,
  3, 3, 8, 8, 2, 59, 155, 155, 3, 3, 3, 1, 14, 14, 20, 38, 1,   4,  11,   1,   4,   4,   8,  29,  29,   2,
  2, 15, 3, 25, 6, 9, 9, 9, 1, 267, 2, 2, 18, 2, 45, 16, 16, 14, 18, 7, 101, 101, 1, 3, 57, 18,
  36, 36, 64, 64, 6, 27, 100, 100, 1, 17, 36, 58, 58)

shape <- c("open", "open", "open", "open", "close", "close", "open", "close", "close", "open_close", "open_close", "close",
"open", "open", "close", "close", "open", "close", "close", "open", "open", "close", "close", "open",        
"close", "open_close", "open_close", "open", "close", "open", "close", "close", "close", "close", "open_close", "open_close",          
"open", "open", "close", "close", "open", "open", "close", "open_close", "open_close", "open", "close", "close",
"open_close", "open_close", "open", "close", "close", "open", "close", "close", "close", "close", "open", "open_close", "open", "close",
"open", "open_close", "open_close", "open_close", "open", "close", "close", "open", "open", "close", "close", "open_close", "open_close", "open",
"open", "open", "open", "open", "close", "close", "open", "close", "open_close", "open_close", "open_close", "open_close", "open","open", "open", "open",        
"open", "close", "close", "open_close", "open_close", "open", "open", "open", "open", "close", "close", "open_close",          
"open_close", "open", "open", "open", "open", "close", "close", "open_close", "open_close",          
"open", "open_close", "open", "close", "close", "open", "close", "close", "open_close", "open_close", "open_close", "open",        
"open", "open", "close", "close", "open_close", "open_close", "open", "open", "close", "close", "open", "open", "close", "open_close","open_close",          
"close", "open_close", "open")

Id <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9 ,10, 10 ,10, 10 ,10, 10 ,10, 10 ,10, 10 ,11, 11, 11, 11, 11, 11,             
11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,      
13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15,  15, 15, 15, 15,  
15, 16, 16, 16, 16, 16, 16, 16, 16) 

#Create data frame
d <- data.frame(vis,shape,Id)

#Check distribution
hist(d$vis)

我选择了我正在使用的数据的一个子集,并且它的分布与拟泊松或负二项式相似。通过选择一个子集,我可能改变了它的分布,但是这个问题的目的是另一个问题,而不是拟合优度。


model <- mixed_model(vis ~ shape, random = ~ 1 | Id, data = d,
                   family = negative.binomial())

这是我计算置信区间的方式,我没有执行非常有效的编码,对此我深表歉意。基本上,我已经手动计算了三个变量的平均值,上限值和下限值。不能100%确定在这种情况下是否可以使用qt函数。

#Predict values
preds <- predict(model, newdata = d,
                     type = "subject_specific",
                     se.fit = TRUE, return_newdata = TRUE)

#Function to calculate SEM
std <- function(x) sd(x)/sqrt(length(x))

#Calculate mean
mean <- mean(preds[preds$shape == "close",4])

#Calculate upper value
upper <- mean(preds[preds$shape == "close",4]) + qt(0.975, df=length(preds[preds$shape == "close",4])) * 
  std(preds[preds$shape == "close",4]) 

#Calculate lower value
lower <- mean(preds[preds$shape == "close",4]) - qt(0.975, df=length(preds[preds$shape == "close",4])) * 
  std(preds[preds$shape == "close",4]) 

shape <- "close" 

#Create data frame with mean and CI of predicted values per variable
mean_ci <- data.frame(mean,upper,lower, shape)

mean <- mean(preds[preds$shape == "open",4])

upper <- mean(preds[preds$shape == "open",4]) + qt(0.975, df=length(preds[preds$shape == "open",4])) * 
  std(preds[preds$shape == "open",4]) 

lower <- mean(preds[preds$shape == "open",4]) - qt(0.975, df=length(preds[preds$shape == "open",4])) * 
  std(preds[preds$shape == "open",4]) 

shape <- "open" 
mean_ci_1 <- data.frame(mean,upper,lower,shape)
mean_ci <- rbind(mean_ci, mean_ci_1)



mean <- mean(preds[preds$shape == "open_close",4])

upper <- mean(preds[preds$shape == "open_close",4]) + qt(0.975, df=length(preds[preds$shape == "open_close",4])) * 
  std(preds[preds$shape == "open_close",4]) 

lower <- mean(preds[preds$shape == "open_close",4]) - qt(0.975, df=length(preds[preds$shape == "open_close",4])) * 
  std(preds[preds$shape == "open_close",4]) 


shape <- "open_close" 
mean_ci_1 <- data.frame(mean,upper,lower,shape)
mean_ci <- rbind(mean_ci, mean_ci_1)

df <- merge(z.days,obs.days, by.x="Date", by.y="Date", all.x=TRUE)

preds <- merge(preds,mean_ci, by="shape")

#Plot predicted values and mean predicted value with CI
ggplot(preds,aes(x= shape,y= pred, color=shape)) +theme_ds4psy()+
  geom_point(shape  = 1,size   = 4, alpha=0.6) + geom_point(aes(x= shape,y= mean, color=shape),shape  = 2,size   = 4, alpha=0.6) +
  geom_errorbar(aes(ymin  =  lower,ymax  =  upper),width =  0.2,size =  0.7)


我在没有预测间隔的情况下附加了要查找的图。理想情况下,我可以将两者都包括在内。

在此先感谢您,如果不清楚或需要更多信息,请告诉我。

Predicted values and mean predicted value with CI

0 个答案:

没有答案