我正在使用LME4构建多变量混合模型(因为我发现更容易理解输出)。但是我无法使其收敛,代码需要永远运行。我首先将18列融为1(特征)。然后,我尝试使用variable1创建一个模型来解释ResponseValue。我的数据框中有大约13个变量,包含14000个观察值,我想一次在模型中包含大约7个(固定效果)变量。即使以最小的代码开始,代码也永远不会结束运行。
我尝试运行默认优化器(无设置)并从模型中删除变量。似乎有帮助,但仍然无济于事。
multiVlm.trait.1stModel<- lmer(ResponseValue ~ trait:(variable1) - 1 +
(trait-1|RandomID),
data=data_melt,
control=lmerControl(optCtrl=list( maxfun=30e3),
optimizer="bobyqa"),verbose=1)
我想让模型收敛,但是当我不设置maxfun时,我得到:
boundary (singular) fit: see ?isSingular
当我将maxfun设置为一个值时,它会告诉我:
maxfun < 10 * length(par)^2 is not recommended.
在两种情况下我都没有任何模型。
所以我猜我要么不了解如何为多变量测试设置固定变量和随机变量,要么不了解如何设置maxfun和其他优化程序参数。还是我的数据框有问题。
这是数据框的一部分:
structure(list(RandomID = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16"), class = "factor"), Day = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L), Exposed = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "1", class = "factor"), timestamp = c(2L, 17L,
42L, 67L, 90L, 112L, 129L, 134L, 178L, 199L, 221L, 336L, 352L,
419L, 450L, 456L, 462L, 476L, 481L, 29L, 50L, 72L, 94L, 116L,
138L, 160L, 178L, 324L, 347L, 369L, 391L, 399L, 413L, 418L, 1L,
27L, 85L, 107L, 129L, 150L, 172L, 262L, 289L, 303L, 326L, 371L,
375L, 88L, 111L, 133L), variable1 = c(59.372, 59.977, 60.466,
60.906, 61.698, 62.671, 63.242, 63.332, 64.452, 64.55, 64.673,
66.53, 66.53, 66.53, 66.695, 66.721, 66.723, 66.723, 66.738,
62.521, 63.546, 63.564, 63.753, 63.828, 64.037, 64.069, 64.069,
66.202, 66.307, 66.316, 66.341, 66.891, 67.38, 67.819, 57.51,
61.236, 63.751, 63.875, 63.899, 63.932, 65.81, 67.069, 68.055,
69.573, 69.983, 70.191, 70.191, 58.2, 62.542, 63.238), variable2 = c(36.062,
36.243, 36.272, 36.306, 36.31, 51.454, 53.834, 53.855, 55.264,
55.91, 56.513, 57.074, 57.075, 57.076, 57.359, 57.375, 57.385,
57.388, 57.388, 38.124, 47.252, 47.28, 47.292, 47.304, 47.426,
47.461, 47.461, 49.403, 50.46, 50.483, 50.502, 50.502, 50.502,
50.51, 23.376, 37.496, 44.286, 44.498, 45.09, 45.413, 45.633,
45.633, 45.667, 45.667, 51.884, 53.045, 53.045, 44.952, 50.338,
51.322), variable3 = c(1.5492, 1.7853, 1.8325, 1.9413, 1.9166,
1.2181, 1.2683, 1.4044, 1.2766, 1.0517, 0.9404, 0.86413, 0.86162,
0.86187, 0.88442, 0.89719, 0.89208, 0.8995, 0.89472, 1.9685,
1.6237, 1.6467, 1.6246, 1.7229, 1.7692, 1.7483, 1.7461, 1.7623,
1.73, 1.7317, 1.797, 1.8062, 1.8074, 1.7986, 1.7179, 2.2422,
2.1168, 1.9199, 1.5924, 1.4847, 1.4686, 1.4847, 1.6072, 1.6003,
1.5521, 1.5551, 1.5551, 0.74516, 1.5343, 1.6129), variable5 = c(3.0611,
2.9752, 2.9748, 2.9761, 2.9747, 2.9816, 2.9849, 2.9858, 2.9893,
2.9903, 2.9914, 2.9949, 2.9949, 2.9949, 2.9951, 2.9954, 2.9954,
2.9955, 2.9956, 2.9746, 2.9827, 2.9875, 2.9904, 2.9922, 2.9928,
2.9938, 2.995, 2.9954, 2.9948, 2.9953, 2.9957, 2.9958, 2.9958,
2.9959, 3.2036, 3.0089, 2.9863, 2.9893, 2.9895, 2.9905, 2.9912,
2.9912, 2.9926, 2.9931, 3.0028, 2.9987, 2.9987, 3.1721, 2.9983,
2.9919), variable4 = c(31.727, 35.866, 35.858, 35.593, 35.875,
39.804, 42.086, 42.055, 43.687, 44.678, 45.65, 45.609, 45.609,
45.619, 45.829, 45.933, 46.049, 46.038, 46.018, 39.11, 41.452,
42.212, 42.33, 43.152, 43.593, 44.888, 45.179, 45.408, 45.679,
46.329, 46.587, 46.627, 46.62, 46.624, 32.531, 38.681, 40.181,
41.094, 43.249, 43.769, 44.035, 44.029, 43.693, 44.74, 46.067,
46.611, 46.611, 35.511, 38.619, 40.678), Age = c(44L, 44L, 44L,
44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L,
44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L,
44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L,
44L, 44L, 44L, 44L, 44L, 44L, 44L, 44L), Sex = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("0", "1"), class = "factor"), trait = c("L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169", "L1_60_f2_6169",
"L1_60_f2_6169"), ResponseValue = c(8.1087, 1.2776, 0.70667,
1.5856, 1.3246, 12.45, 3.0172, 1.1216, 20.958, 12.753, 11.239,
3.1055, 8.3986, 2.8289, 3.2187, 3.7314, 5.1144, 1.0522, 1.9444,
0.7096, 5.9223, 5.3697, 3.2527, 5.1819, 4.8512, 8.6721, 3.0177,
8.363, 7.9323, 7.1409, 5.5397, 5.1321, 9.1606, 8.4178, 1.6346,
6.4326, 2.3074, 7.032, 7.6211, 5.0022, 6.5688, 10.239, 1.4024,
5.633, 8.108, 7.1154, 5.8687, 5.8858, 6.1624, 5.9662)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 52L, 53L, 54L), class = "data.frame")
答案 0 :(得分:0)
如果没有可复制的示例,这将很难(不可能?)进行调试。
当我使用提供的数据子集运行模型时,模型需要约6秒钟的时间来拟合,并运行931次迭代。因此,这无助于弄清楚发生了什么……(如果仅使用此子集运行模型,您会成功吗?)
单数拟合不一定是一个错误:当您尝试拟合大方差-协方差矩阵时,这是一个合理的可能结果(请参见?isSingular
中的详细信息)
一些小技巧:(1)您可以设置verbose=100
以在优化程序运行时获取大量进度信息; (2)在控制设置中使用calc.derivs=FALSE
将进行拟合后的导数计算(对于大型模型,这可能会很慢)。在我的旧Macbook pro上,拟合一组相似的模拟数据花了432次迭代,大约1分钟。相同的模型(没有control
/ verbose
设置)也可以在glmmTMB
中运行-花费了大约两倍的时间。
这是我的模拟数据:
library(lme4)
set.seed(101)
n <- 14000
nt <- 7
dd <- as.data.frame(replicate(nt,rnorm(n)))
names(dd) <- sprintf("trait%d",1:nt)
dd$ID <- 1:n
## melt data to long format, preserve ID
## your "variable" is my "x"
ddm <- tidyr::gather(dd,trait,x,-c(ID))
## parameters (chosen rather arbitrarily)
np <- list(beta=rep(1,nt),
theta=rep(0.5,nt*(nt+1)/2),
sigma=1)
## I'm not quite sure how you're getting away without
## this ...
options(lmerControl=list(check.nobs.vs.nRE="warning"))
ddm$y <- simulate(~trait:x-1 + (trait-1|ID),
newdata=ddm,
family=gaussian,
newparams=list(beta=rep(1,nt),
theta=rep(0.5,nt*(nt+1)/2),
sigma=1))[[1]]
现在适合:
m1 <- lmer(y~trait:x-1 + (trait-1|ID),
data=ddm,
verbose=100,
control=lmerControl(check.nobs.vs.nRE="warning",
calc.derivs=FALSE))
这会暂停几秒钟(在我的旧版Macbook Pro上可能会长达10秒钟),然后开始打印该表格的进度报告
迭代:164 x =(0.851012,0.376852,0.339929,0.364975,0.324903,0.357866,0.375031,0.981579,0.520984,0.558344,0.493708,0.531058,0.572938,1.030928,0.639176,0.651585,0.589269,0.619625,1.003574,0.599344,0.576374,0.540512,1.033954,0.5760954 ,0.585772、0.9999795、0.563064、1.035849)
并产生方差-协方差矩阵的合理的合理估计:
> VarCorr(m1)
Groups Name Std.Dev. Corr
ID traittrait1 0.73486
traittrait2 0.89078 0.379
traittrait3 1.03700 0.300 0.534
traittrait4 1.13949 0.314 0.499 0.642
traittrait5 1.25914 0.271 0.433 0.569 0.702
traittrait6 1.32450 0.260 0.426 0.538 0.653 0.749
traittrait7 1.43165 0.245 0.395 0.506 0.605 0.700 0.782
“真正的”标准开发者:
## lower-triangular matrix, all == 0.5
m <- matrix(0.5,7,7)
m[col(m)>row(m)] <- 0
V <- m %*% t(m)
round(sqrt(diag(V)),3)
## [1] 0.500 0.707 0.866 1.000 1.118 1.225 1.323
True correlations:
round(cov2cor(V),3)
## 0.707
## 0.577 0.816
## 0.500 0.707 0.866
## 0.447 0.632 0.775 0.894
## 0.408 0.577 0.707 0.816 0.913
## 0.378 0.535 0.655 0.756 0.845 0.926
因此相关性看起来有点低(在这种特定的实现中sigma可能很高吗?),但并不疯狂。