我正在努力从持有一些变量的lmList对象生成预测,但是在使用完整的笛卡尔数据产品和仅使用几行之间发现了不一致的预测。如果我使用lmList的 lme4 或 nlme 版本,那似乎并不重要。
library(data.table)
library(boot)
library(lme4)
set.seed(31)
# SET PRIMARY ATTRIBUTES
x2 <- c(-1.5800,-1.0660,-0.7640,-0.4970,-0.2395,0.0250,0.3140,0.6535,1.1350,2.0750)
x1 <- c(-1, 0, 1)
grp <- factor(letters[1:5])
# SET MEANS FOR EACH GROUP
y_grp_mn <- runif(5, min = 0.3, max = 0.6)
# EXPAND GRID
dt <- expand.grid(x1, x2, grp)
names(dt) <- c("x1", "x2", "grp")
# EXPAND DT
dt <- rbind(dt, dt, dt, dt, dt)
# SET BINARY OUTCOME WITH y_grp_mn PROBABILITIES
dt$y <- as.numeric(lapply(dt$grp, function(x) {
ifelse(rnorm(1, mean = y_grp_mn[as.numeric(x)]) > 0.5, 1, 0 )
}))
# CONVERT TO DATA TABLE
dt <- data.table(dt)
# GENERATE A LINEAR MODEL AT EACH LEVEL GRP
glm_list <- lmList(y ~ x1 + x2 | grp,
family = binomial(link = "logit"),
data = dt)
# CREATE A SIMPLE TEST DATASET FROM MEANS AT EACH LEVEL OF X1
# GET MEANS AT EACH LEVEL OF GRP x X2
grp_means <- dt[, .(mn = mean(y)), by = .(grp, x2)]
# REPLICATE AT X1
pred_data <- rbind(data.table(grp_means, x1 = -1),
data.table(grp_means, x1 = 0),
data.table(grp_means, x1 = 1))
# GENERATE PREDICTIONS FOR TEST DATA
pred_data[ , preds := inv.logit(predict(glm_list, as.data.frame(pred_data)))]
# EXAMINE PREDICTIONS AT LEVELS OF X1 HOLDING GRP AND X2 CONSTANT
pred_data[c(50,100,150),]
# grp x2 mn x1 preds
# 1: e 2.075 0.6 -1 0.4689207
# 2: e 2.075 0.6 0 0.3400327
# 3: e 2.075 0.6 1 0.6517386
# TRY TO REPLICATE PREDICTED VALUES ON SAME SUBSET OF FULL DATA
inv.logit(predict(glm_list, pred_data[c(50,100,150),]))
# e e e
# 0.4725348 0.5642330 0.6517386
如图所示,只复制了子集的第三行。我有什么遗失的吗?
感谢。