指定" 2l.norm"调用鼠标的方法我偶然发现一个错误消息,只包含1个NA的变量。我意识到这是一个非常小的问题,考虑到这些变量的非常少量的缺失数据。但是,考虑到这些数据结构也会很优雅。
我使用可供所有人使用的数据库ChickWeight数据集重新创建了这种情况。 我非常清楚这个问题也可能是我执行程序时出错的结果,所以如果是这样的话请告诉我。
ChickWeight[1:20, ]
dim(ChickWeight)
sum(is.na(ChickWeight)) #contains no NAs
ChickWeight$weight[12] <- NA # add 1 NA
ChickWeight$constant <- 1 #add a constant
ChickWeight$Chick <- as.numeric(levels(ChickWeight$Chick)[ChickWeight$Chick]) #class variable has to be an integer
ini <- mice(ChickWeight, maxit = 0)
pred <- ini$predictorMatrix
pred["weight", ] <- c(0, 2, -2, 1, 2)
method <- ini$method
method["weight"] <- "2l.norm"
imputation <- mice(ChickWeight, m = 5, maxit = 5, pred = pred, method = method)
最后一个命令导致:
[<-.data.frame
中的错误(*tmp*
,, i,值= c(37.3233463394145,159.862324738397:替换有2行,数据有1
添加一个额外的NA可以解决问题
ChickWeight$weight[13] <- NA # add another NA
imputation <- mice(ChickWeight, m = 5, maxit = 5, pred = pred, method = method)
有谁知道可能导致错误的原因?
答案 0 :(得分:2)
答案是由老鼠包的作者Stef van Buuren通过电子邮件向我提供的。 当前版本的鼠标包(2.22)中的mice.impute.2l.norm函数有一个非常小的遗漏,并将在下一个版本中修复。
正确的代码如下,其中只有“drop = FALSE”在第4行从最后一行添加到rowSums(as.matrix(x [nry,type == 2]:
mice.impute.2l.norm2 <-
function (y, ry, x, type, intercept = TRUE, ...)
{
rwishart <- function(df, p = nrow(SqrtSigma), SqrtSigma = diag(p)) {
Z <- matrix(0, p, p)
diag(Z) <- sqrt(rchisq(p, df:(df - p + 1)))
if (p > 1) {
pseq <- 1:(p - 1)
Z[rep(p * pseq, pseq) + unlist(lapply(pseq, seq))] <- rnorm(p *
(p - 1)/2)
}
crossprod(Z %*% SqrtSigma)
}
force.chol <- function(x, warn = TRUE) {
z <- 0
repeat {
lambda <- 0.1 * z
XT <- x + diag(x = lambda, nrow = nrow(x))
XT <- (XT + t(XT))/2
s <- try(expr = chol(XT), silent = TRUE)
if (class(s) != "try-error")
break
z <- z + 1
}
attr(s, "forced") <- (z > 0)
if (warn && z > 0)
warning("Cholesky decomposition had to be forced",
call. = FALSE)
return(s)
}
if (intercept) {
x <- cbind(1, as.matrix(x))
type <- c(2, type)
}
n.iter <- 100
nry <- !ry
n.class <- length(unique(x[, type == (-2)]))
if (n.class == 0)
stop("No class variable")
gf.full <- factor(x[, type == (-2)], labels = 1:n.class)
gf <- gf.full[ry]
XG <- split.data.frame(as.matrix(x[ry, type == 2]), gf)
X.SS <- lapply(XG, crossprod)
yg <- split(as.vector(y[ry]), gf)
n.g <- tabulate(gf)
n.rc <- ncol(XG[[1]])
bees <- matrix(0, nrow = n.class, ncol = n.rc)
ss <- vector(mode = "numeric", length = n.class)
mu <- rep(0, n.rc)
inv.psi <- diag(1, n.rc, n.rc)
inv.sigma2 <- rep(1, n.class)
sigma2.0 <- 1
theta <- 1
for (iter in 1:n.iter) {
for (class in 1:n.class) {
vv <- sym(inv.sigma2[class] * X.SS[[class]] + inv.psi)
bees.var <- chol2inv(chol(vv))
bees[class, ] <- drop(bees.var %*% (crossprod(inv.sigma2[class] *
XG[[class]], yg[[class]]) + inv.psi %*% mu)) +
drop(rnorm(n = n.rc) %*% chol(sym(bees.var)))
ss[class] <- crossprod(yg[[class]] - XG[[class]] %*%
bees[class, ])
}
mu <- colMeans(bees) + drop(rnorm(n = n.rc) %*% chol(chol2inv(chol(sym(inv.psi)))/n.class))
inv.psi <- rwishart(df = n.class - n.rc - 1, SqrtSigma = chol(chol2inv(chol(sym(crossprod(t(t(bees) -
mu)))))))
inv.sigma2 <- rgamma(n.class, n.g/2 + 1/(2 * theta),
scale = 2 * theta/(ss * theta + sigma2.0))
H <- 1/mean(inv.sigma2)
sigma2.0 <- rgamma(1, n.class/(2 * theta) + 1, scale = 2 *
theta * H/n.class)
G <- exp(mean(log(1/inv.sigma2)))
theta <- 1/rgamma(1, n.class/2 - 1, scale = 2/(n.class *
(sigma2.0/H - log(sigma2.0) + log(G) - 1)))
}
imps <- rnorm(n = sum(nry), sd = sqrt(1/inv.sigma2[gf.full[nry]])) +
rowSums(as.matrix(x[nry, type == 2, drop = FALSE]) * bees[gf.full[nry],
])
return(imps)
}
通过
将新功能添加到鼠标命名空间时environment(mice.impute.2l.norm2) <- asNamespace('mice')
它可以通过调用2l.norm2在小鼠中正常使用。要在前面描述的例子中显示:
method["weight"] <- "2l.norm2"
imputation <- mice(ChickWeight, m = 5, maxit = 5, pred = pred, method = method)
现在按预期工作了!