我现在想如何计算此数据的置信区间和预测区间。我想我已经设法完成了第一个,但是我无法在预测间隔内弄清楚。我已经遵循了这个GLMM支持的示例(https://drizopoulos.github.io/GLMMadaptive/articles/Dynamic_Predictions.html#prediction-intervals),但不幸的是没有运气。我不确定我是否正确计算了CI,这就是为什么我也将它们包括在内的原因。
为glmm创建数据框:
library(GLMMadaptive)
vis <- c(60, 14, 22, 22, 13, 13, 7, 15, 17, 14, 14, 163, 17, 19, 87, 87, 58, 40, 73, 4, 203, 34, 34, 11, 38, 127,
127, 5, 90, 13, 3, 3, 5, 117, 8, 8, 39, 39, 2, 2, 4, 1, 9, 29, 29, 36, 15, 15, 165, 165, 2, 10,
10, 10, 36, 9, 20, 20, 4, 2, 7, 33, 1, 19, 19, 2, 1, 8, 8, 33, 4, 108, 3, 85, 85, 12, 64, 36,
3, 3, 8, 8, 2, 59, 155, 155, 3, 3, 3, 1, 14, 14, 20, 38, 1, 4, 11, 1, 4, 4, 8, 29, 29, 2,
2, 15, 3, 25, 6, 9, 9, 9, 1, 267, 2, 2, 18, 2, 45, 16, 16, 14, 18, 7, 101, 101, 1, 3, 57, 18,
36, 36, 64, 64, 6, 27, 100, 100, 1, 17, 36, 58, 58)
shape <- c("open", "open", "open", "open", "close", "close", "open", "close", "close", "open_close", "open_close", "close",
"open", "open", "close", "close", "open", "close", "close", "open", "open", "close", "close", "open",
"close", "open_close", "open_close", "open", "close", "open", "close", "close", "close", "close", "open_close", "open_close",
"open", "open", "close", "close", "open", "open", "close", "open_close", "open_close", "open", "close", "close",
"open_close", "open_close", "open", "close", "close", "open", "close", "close", "close", "close", "open", "open_close", "open", "close",
"open", "open_close", "open_close", "open_close", "open", "close", "close", "open", "open", "close", "close", "open_close", "open_close", "open",
"open", "open", "open", "open", "close", "close", "open", "close", "open_close", "open_close", "open_close", "open_close", "open","open", "open", "open",
"open", "close", "close", "open_close", "open_close", "open", "open", "open", "open", "close", "close", "open_close",
"open_close", "open", "open", "open", "open", "close", "close", "open_close", "open_close",
"open", "open_close", "open", "close", "close", "open", "close", "close", "open_close", "open_close", "open_close", "open",
"open", "open", "close", "close", "open_close", "open_close", "open", "open", "close", "close", "open", "open", "close", "open_close","open_close",
"close", "open_close", "open")
Id <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,
4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9 ,10, 10 ,10, 10 ,10, 10 ,10, 10 ,10, 10 ,11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15,
15, 16, 16, 16, 16, 16, 16, 16, 16)
#Create data frame
d <- data.frame(vis,shape,Id)
#Check distribution
hist(d$vis)
我选择了我正在使用的数据的一个子集,并且它的分布与拟泊松或负二项式相似。通过选择一个子集,我可能改变了它的分布,但是这个问题的目的是另一个问题,而不是拟合优度。
model <- mixed_model(vis ~ shape, random = ~ 1 | Id, data = d,
family = negative.binomial())
这是我计算置信区间的方式,我没有执行非常有效的编码,对此我深表歉意。基本上,我已经手动计算了三个变量的平均值,上限值和下限值。不能100%确定在这种情况下是否可以使用qt函数。
#Predict values
preds <- predict(model, newdata = d,
type = "subject_specific",
se.fit = TRUE, return_newdata = TRUE)
#Function to calculate SEM
std <- function(x) sd(x)/sqrt(length(x))
#Calculate mean
mean <- mean(preds[preds$shape == "close",4])
#Calculate upper value
upper <- mean(preds[preds$shape == "close",4]) + qt(0.975, df=length(preds[preds$shape == "close",4])) *
std(preds[preds$shape == "close",4])
#Calculate lower value
lower <- mean(preds[preds$shape == "close",4]) - qt(0.975, df=length(preds[preds$shape == "close",4])) *
std(preds[preds$shape == "close",4])
shape <- "close"
#Create data frame with mean and CI of predicted values per variable
mean_ci <- data.frame(mean,upper,lower, shape)
mean <- mean(preds[preds$shape == "open",4])
upper <- mean(preds[preds$shape == "open",4]) + qt(0.975, df=length(preds[preds$shape == "open",4])) *
std(preds[preds$shape == "open",4])
lower <- mean(preds[preds$shape == "open",4]) - qt(0.975, df=length(preds[preds$shape == "open",4])) *
std(preds[preds$shape == "open",4])
shape <- "open"
mean_ci_1 <- data.frame(mean,upper,lower,shape)
mean_ci <- rbind(mean_ci, mean_ci_1)
mean <- mean(preds[preds$shape == "open_close",4])
upper <- mean(preds[preds$shape == "open_close",4]) + qt(0.975, df=length(preds[preds$shape == "open_close",4])) *
std(preds[preds$shape == "open_close",4])
lower <- mean(preds[preds$shape == "open_close",4]) - qt(0.975, df=length(preds[preds$shape == "open_close",4])) *
std(preds[preds$shape == "open_close",4])
shape <- "open_close"
mean_ci_1 <- data.frame(mean,upper,lower,shape)
mean_ci <- rbind(mean_ci, mean_ci_1)
df <- merge(z.days,obs.days, by.x="Date", by.y="Date", all.x=TRUE)
preds <- merge(preds,mean_ci, by="shape")
#Plot predicted values and mean predicted value with CI
ggplot(preds,aes(x= shape,y= pred, color=shape)) +theme_ds4psy()+
geom_point(shape = 1,size = 4, alpha=0.6) + geom_point(aes(x= shape,y= mean, color=shape),shape = 2,size = 4, alpha=0.6) +
geom_errorbar(aes(ymin = lower,ymax = upper),width = 0.2,size = 0.7)
我在没有预测间隔的情况下附加了要查找的图。理想情况下,我可以将两者都包括在内。
在此先感谢您,如果不清楚或需要更多信息,请告诉我。