我想为逻辑回归编写自己的绘图函数。 如果我手动输入变量和数据框,则有一个运行的代码。但是,当我尝试编写函数时,出现错误消息。
这是我的有效代码:
logRegEst <- function(x, y, threshold = 1e-10, maxIter = 100)
{
calcPi <- function(x, beta)
{
beta <- as.vector(beta)
return(exp(x %*% beta) / (1 + exp(x %*% beta)))
}
beta <- rep(0, ncol(x)) # initial guess for beta
diff <- 1000
# initial value bigger than threshold so that we can enter our while loop
iterCount = 0
# counter for the iterations to ensure we're not stuck in an infinite loop
while(diff > threshold) # tests for convergence
{
pi <- as.vector(calcPi(x, beta))
# calculate pi by using the current estimate of beta
W <- diag(pi * (1 - pi))
# calculate matrix of weights W as defined in the fisher scooring algorithem
beta_change <- solve(t(x) %*% W %*% x) %*% t(x) %*% (y - pi)
# calculate the change in beta
beta <- beta + beta_change
# new beta
diff <- sum(beta_change^2)
# calculate how much we changed beta by in this iteration
# if this is less than threshold, we'll break the while loop
iterCount <- iterCount + 1
# see if we've hit the maximum number of iterations
if(iterCount > maxIter){
stop("This isn't converging.")
}
# stop if we have hit the maximum number of iterations
}
n <- length(y)
df <- length(y) - ncol(x)
# calculating the degrees of freedom by taking the length of y minus
# the number of x columns
vcov <- solve(t(x) %*% W %*% x)
logLik <- sum(y * log(pi / (1 - pi)) + log(1 - pi))
deviance <- -2 * logLik
AIC <- -2 * logLik + 2 * ncol(x)
rank <- ncol(x)
#h <- diag(W^(1/2)%*%x%*%(t(x)%*%W%*%x)^(-1)%*%t(x)%*%W^(1/2))
#stdresiduals <- est$residuals/(sqrt(1-h))
#stdresiduals <- W%*%solve(crossprod(W,W),t(W))
list(coefficients = beta, vcov = vcov, df = df, deviance = deviance,
AIC = AIC, iter = iterCount - 1, x = x, y = y, n = n, rank = rank)
# returning results
}
logReg <- function(formula, data)
{
if (sum(is.na(data)) > 0) {
print("missing values in data")
} else {
mf <- model.frame(formula = formula, data = data)
# model.frame() returns us a data.frame with the variables needed to use the
# formula.
x <- model.matrix(attr(mf, "terms"), data = mf)
# model.matrix() creates a disign matrix. That means that for example the
#"Sex"-variable is given as a dummy variable with ones and zeros.
y <- as.numeric(model.response(mf)) - 1
# model.response gives us the response variable.
est <- logRegEst(x, y)
# Now we have the starting position to apply our function from above.
est$formula <- formula
est$call <- match.call()
# We add the formular and the call to the list.
nullModel <- logRegEst(x = as.matrix(rep(1, length(y))), y)
est$nullDeviance <- nullModel$deviance
est$nullDf <- nullModel$df
mu <- exp(as.vector(est$x %*% est$coefficients)) /
(1 + exp(as.vector(est$x %*% est$coefficients)))
# computing the fitted values
est$residuals <- (est$y - mu) / sqrt(mu * (1 - mu))
est$mu <- mu
est$x <- x
est$y <- y
est$data <- data
#hat <- (t(mu))^(1/2)%*%x%*%(t(x)%*%mu%*%x)^(-1)%*%t(x)%*%mu^(1/2)
#est$stdresiduals <- est$residuals/(sqrt(1-hat))
VX <- diag(sqrt(mu*(1-mu)))
hat <- H <- VX%*%solve(crossprod(VX,VX),t(VX))
est$stdresiduals <- est$residuals/(sqrt(1-hat))
class(est) <- "logReg"
# defining the class
est
}
}
predict.logReg <- function(object, newdata=NULL, factorName = NULL, ...) {
ranks <- length(unique(object$data[, factorName]))
newdata[, factorName] <- factor(newdata[, factorName], levels = c(1:ranks))
xNames <- delete.response(terms(object$formula))
xMatrix <- model.matrix(xNames, newdata)
(exp(xMatrix %*% object$coefficients))/(1 + (exp(xMatrix %*% object$coefficients)))
}
mydata <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")
mydata2 <- mydata[with(mydata, order(mydata$gre)),]
points <- (predict.logReg(logReg(admit~ mydata2$gre, data = mydata2)))
plot(mydata2$gre, points[,1])
lines(x = mydata2$gre, y = points[,1])
我得到的是这个情节:
当我尝试编写绘图函数时,出现错误:
plot.logReg <- function(object, varName, ...)
{
keep_loop = TRUE
while (keep_loop) {
switch (menu(c("QQ plot for residuals of logistic regression",
"Logistic regression model fit", "exit"),
title = "Which plot?"),
1 == {
qqnorm(object$residuals,
main = "Normal QQ-Plot of Model Residuals",
ylab = "Residuals")
# plot residuals against the theoretical quantils of
# the standard normal distribution and give the plot the titel
# "Normal QQ-Plot of Model Residuals"
qqline(object$residuals)
# add the line through the origin
},
2 == {
mydata2 <- object$data[with(object$data, order(object$data[,"varName"])),]
points <- (predict.logReg(logReg(object$y ~ varName, data = mydata2)))
plot(mydata2[,"varName"], points[,1], xlab = "variable", ylab = "Probabilities")
lines(x = mydata2[,"varName"], y = points[,1])
},
3 == {
keep_loop = FALSE
})
}
}
log <- logReg(admit ~ gre + gpa + rank, mydata)
plot(log, varName = gre)
为什么plot.logReg函数不起作用?非常感谢您的帮助。谢谢! 编辑:第二次尝试:
2 == {
mydata2 <- object$data[with(object$data, order(object$data[,varName])),]
points <- (predict.logReg(logReg(object$y ~ varName, data = mydata2)))
plot(mydata2[,varName], points[,1], xlab = varName, ylab = "Probabilities")
lines(x = mydata2[,varName], y = points[,1])
},
3 == {
keep_loop = FALSE
})
}
}
plot(log, varName = "gre")
第三次尝试:
2 == {
mydata2 <- object$data[with(object$data, order(object$data %>% pull(varName))),]
points <- (predict.logReg(logReg(object$y ~ varName, data = mydata2)))
plot(mydata2 %>% pull(varName), points[,1], xlab = "varName", ylab = "Probabilities")
lines(x = mydata2 %>% pull(varName), y = points[,1])
},
3 == {
keep_loop = FALSE
})
}
}
plot(log, varName = gre)
答案 0 :(得分:0)
@RuiBarrades和@LAP感谢您的回答。我已经尝试过事先更改报价,但是没有用。例如,我尝试过:
plot.logReg <- function(object, varName, ...)
{
keep_loop = TRUE
while (keep_loop) {
switch (menu(c("QQ plot for residuals of logistic regression",
"Logistic regression model fit", "exit"),
title = "Which plot?"),
1 == {
qqnorm(object$residuals,
main = "Normal QQ-Plot of Model Residuals",
ylab = "Residuals")
# plot residuals against the theoretical quantils of
# the standard normal distribution and give the plot the titel
# "Normal QQ-Plot of Model Residuals"
qqline(object$residuals)
# add the line through the origin
},
2 == {
mydata2 <- object$data[with(object$data, order(object$data[,varName])),]
points <- (predict.logReg(logReg(object$y ~ varName, data = mydata2)))
plot(mydata2[,varName], points[,1], xlab = varName, ylab = "Probabilities")
lines(x = mydata2[,varName], y = points[,1])
},
3 == {
keep_loop = FALSE
})
}
}
plot(log, varName = "gre")
如果这样做,我会收到错误消息:找不到对象gre。
您知道我该如何解决吗?再次感谢您的回答。