我希望使用glm
查看数据集中每个观察的对数可能性的组成部分。如果我使用optim
,我可以使用下面的逻辑回归示例。
以下是使用glm
然后optim
分析的示例数据集。
my.data <- read.table(text='
y x
0 1
0 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
', header = TRUE)
my.model <- glm(y ~ 1, family=binomial(link='logit'), data = my.data)
summary(my.model)
logLik(my.model)
# 'log Lik.' -5.004024 (df=1)
这是optim
代码。我使用print
语句来查看对数似然的组件。
y = as.matrix(my.data$y)
x = as.matrix(my.data$x)
n <- dim(y)[1]
logL = rep(NA,n)
logit.negLL = function(param) {
expit = function(zz) 1/(1+exp(-zz))
b0 = param[1]
for (i in 1:n) {
logL[i] = 0
psi = expit(b0)
logL[i] = logL[i] + log( psi * y[i] + (1-psi)*(1 - y[i]))
}
print(logL)
(-1)*sum(logL)
}
fit = optim(par=c(0.5), fn=logit.negLL, method='Brent', lower = -20, upper = 20)
fit
这一行加上了对数似然的组成部分:
sum(-1.6094379, -1.6094379, -0.2231436, -0.2231436, -0.2231436, -0.2231436, -0.2231436, -0.2231436, -0.2231436, -0.2231436)
#[1] -5.004025
如何从sum
glm
语句中的值?
修改
我想也许logLik
的源代码可能会提供答案。不幸的是,事实并非如此。它从模型rank - aic/ 2
methods(logLik)
stats:::logLik.glm
function (object, ...)
{
if (!missing(...))
warning("extra arguments discarded")
fam <- family(object)$family
p <- object$rank
if (fam %in% c("gaussian", "Gamma", "inverse.gaussian"))
p <- p + 1
val <- p - object$aic/2
attr(val, "nobs") <- sum(!is.na(object$residuals))
attr(val, "df") <- p
class(val) <- "logLik"
val
}
p <- my.model$rank
ll <- p - my.model$aic / 2
ll
# [1] -5.004024
修改
我也尝试查看AIC
函数,但看起来它正在从logLik
获取对数可能性。我不能正确理解这一点。
objects("package:stats", all=TRUE)
stats:::AIC.default
function (object, ..., k = 2)
{
ll <- if (isNamespaceLoaded("stats4"))
stats4::logLik
else logLik
if (!missing(...)) {
lls <- lapply(list(object, ...), ll)
vals <- sapply(lls, function(el) {
no <- attr(el, "nobs")
c(as.numeric(el), attr(el, "df"), if (is.null(no)) NA_integer_ else no)
})
val <- data.frame(df = vals[2L, ], ll = vals[1L, ])
nos <- na.omit(vals[3L, ])
if (length(nos) && any(nos != nos[1L]))
warning("models are not all fitted to the same number of observations")
val <- data.frame(df = val$df, AIC = -2 * val$ll + k *
val$df)
Call <- match.call()
Call$k <- NULL
row.names(val) <- as.character(Call[-1L])
val
}
else {
lls <- ll(object)
-2 * as.numeric(lls) + k * attr(lls, "df")
}
}
<bytecode: 0x000000000f8a9cd0>
<environment: namespace:stats>
my.AIC <- function (object, ..., k = 2)
{
ll <- if (isNamespaceLoaded("stats4"))
stats4::logLik
else logLik
if (!missing(...)) {
lls <- lapply(list(object, ...), ll)
vals <- sapply(lls, function(el) {
no <- attr(el, "nobs")
c(as.numeric(el), attr(el, "df"), if (is.null(no)) NA_integer_ else no)
})
val <- data.frame(df = vals[2L, ], ll = vals[1L, ])
nos <- na.omit(vals[3L, ])
if (length(nos) && any(nos != nos[1L]))
warning("models are not all fitted to the same number of observations")
val <- data.frame(df = val$df, AIC = -2 * val$ll + k *
val$df)
Call <- match.call()
Call$k <- NULL
row.names(val) <- as.character(Call[-1L])
val
}
else {
lls <- ll(object)
-2 * as.numeric(lls) + k * attr(lls, "df")
}
}
my.AIC(my.model)
# [1] 12.00805
ll <- if (isNamespaceLoaded("stats4")) stats4::logLik else logLik
lls <- lapply(list(my.model), ll)
vals <- sapply(lls, function(el) {
no <- attr(el, "nobs")
c(as.numeric(el), attr(el, "df"), if (is.null(no)) NA_integer_ else no)
})
val <- data.frame(df = vals[2L, ], ll = vals[1L, ])
val
# df ll
# 1 1 -5.004024