查看每个观察的对数可能性的组成部分

时间:2016-06-19 10:00:28

标签: r regression

我希望使用glm查看数据集中每个观察的对数可能性的组成部分。如果我使用optim,我可以使用下面的逻辑回归示例。

以下是使用glm然后optim分析的示例数据集。

my.data <- read.table(text='
   y  x
   0  1
   0  1
   1  1
   1  1
   1  1
   1  1
   1  1
   1  1
   1  1
   1  1
', header = TRUE)

my.model <- glm(y ~ 1, family=binomial(link='logit'), data = my.data)
summary(my.model)
logLik(my.model)
# 'log Lik.' -5.004024 (df=1)

这是optim代码。我使用print语句来查看对数似然的组件。

y = as.matrix(my.data$y)
x = as.matrix(my.data$x)

n <- dim(y)[1]
logL = rep(NA,n)

logit.negLL = function(param) {
     expit = function(zz) 1/(1+exp(-zz))
     b0 = param[1]
     for (i in 1:n) {
          logL[i] = 0
          psi = expit(b0)
          logL[i] = logL[i] + log( psi * y[i] + (1-psi)*(1 - y[i]))
     }
print(logL)
(-1)*sum(logL)
}

fit = optim(par=c(0.5), fn=logit.negLL, method='Brent', lower = -20, upper = 20)
fit

这一行加上了对数似然的组成部分:

sum(-1.6094379, -1.6094379, -0.2231436, -0.2231436, -0.2231436, -0.2231436, -0.2231436, -0.2231436, -0.2231436, -0.2231436)
#[1] -5.004025

如何从sum

获取此glm语句中的值?

修改

我想也许logLik的源代码可能会提供答案。不幸的是,事实并非如此。它从模型rank - aic/ 2

中获取对数似然的值
methods(logLik)
stats:::logLik.glm

function (object, ...) 
{
    if (!missing(...)) 
        warning("extra arguments discarded")
    fam <- family(object)$family
    p <- object$rank
    if (fam %in% c("gaussian", "Gamma", "inverse.gaussian")) 
        p <- p + 1
    val <- p - object$aic/2
    attr(val, "nobs") <- sum(!is.na(object$residuals))
    attr(val, "df") <- p
    class(val) <- "logLik"
    val
}

p <- my.model$rank

ll <- p - my.model$aic / 2
ll

# [1] -5.004024

修改

我也尝试查看AIC函数,但看起来它正在从logLik获取对数可能性。我不能正确理解这一点。

objects("package:stats", all=TRUE)

stats:::AIC.default

function (object, ..., k = 2) 
{
    ll <- if (isNamespaceLoaded("stats4")) 
        stats4::logLik
    else logLik
    if (!missing(...)) {
        lls <- lapply(list(object, ...), ll)
        vals <- sapply(lls, function(el) {
            no <- attr(el, "nobs")
            c(as.numeric(el), attr(el, "df"), if (is.null(no)) NA_integer_ else no)
        })
        val <- data.frame(df = vals[2L, ], ll = vals[1L, ])
        nos <- na.omit(vals[3L, ])
        if (length(nos) && any(nos != nos[1L])) 
            warning("models are not all fitted to the same number of observations")
        val <- data.frame(df = val$df, AIC = -2 * val$ll + k * 
            val$df)
        Call <- match.call()
        Call$k <- NULL
        row.names(val) <- as.character(Call[-1L])
        val
    }
    else {
        lls <- ll(object)
        -2 * as.numeric(lls) + k * attr(lls, "df")
    }
}
<bytecode: 0x000000000f8a9cd0>
<environment: namespace:stats>

my.AIC <- function (object, ..., k = 2) 
{
    ll <- if (isNamespaceLoaded("stats4")) 
        stats4::logLik
    else logLik
    if (!missing(...)) {
        lls <- lapply(list(object, ...), ll)
        vals <- sapply(lls, function(el) {
            no <- attr(el, "nobs")
            c(as.numeric(el), attr(el, "df"), if (is.null(no)) NA_integer_ else no)
        })
        val <- data.frame(df = vals[2L, ], ll = vals[1L, ])
        nos <- na.omit(vals[3L, ])
        if (length(nos) && any(nos != nos[1L])) 
            warning("models are not all fitted to the same number of observations")
        val <- data.frame(df = val$df, AIC = -2 * val$ll + k * 
            val$df)
        Call <- match.call()
        Call$k <- NULL
        row.names(val) <- as.character(Call[-1L])
        val
    }
    else {
        lls <- ll(object)
        -2 * as.numeric(lls) + k * attr(lls, "df")
    }
}

my.AIC(my.model)
# [1] 12.00805

ll <- if (isNamespaceLoaded("stats4")) stats4::logLik else logLik
lls <- lapply(list(my.model), ll)
vals <- sapply(lls, function(el) {
            no <- attr(el, "nobs")
            c(as.numeric(el), attr(el, "df"), if (is.null(no)) NA_integer_ else no)
})
val <- data.frame(df = vals[2L, ], ll = vals[1L, ])
val
#   df        ll
# 1  1 -5.004024

0 个答案:

没有答案