我正在尝试在R中编写自己的逻辑回归函数。目标是获得一个与glm()函数具有相同结果的函数,其中family =二项式。我对序数数据有一些疑问。我曾尝试将其分解(通常在使用glm()函数的情况下进行)。但是,我的函数和glm()函数的结果不同。有谁知道如何用序数数据解决问题?在此先感谢您:)您可以在下面找到我的代码。
manual_logistic_regression = function(X,y,threshold = 1e-10, max_iter = 100)
#A function to find logistic regression coefficients
#Takes three inputs:
{
#A function to return p, given X and beta
#We'll need this function in the iterative section
calc_p = function(X,beta)
{
beta = as.vector(beta)
return(exp(X%*%beta) / (1+ exp(X%*%beta)))
}
#### setup bit ####
#initial guess for beta
beta = rep(0,ncol(X))
#initial value bigger than threshold so that we can enter our while loop
diff = 10000 # has to be bigger than threshold
#counter to ensure we're not stuck in an infinite loop
iter_count = 0
#### iterative bit ####
while(diff > threshold ) #tests for convergence
{
#calculate probabilities using current estimate of beta
p = as.vector(calc_p(X,beta)) #p is changing due to the new beta in each iteration
#calculate matrix of weights W
W = diag(p*(1-p)) #varianz von Y, weil Y 1 oder 0 ist
#calculate the change in beta
beta_change = solve(t(X)%*%W%*%X) %*% t(X)%*%(y - p)
#update beta
beta = beta + beta_change
#calculate how much we changed beta by in this iteration
#if this is less than threshold, we'll break the while loop
diff = sum(beta_change^2)
#see if we've hit the maximum number of iterations
iter_count = iter_count + 1
if(iter_count > max_iter) {
stop("Not converging.")
}
}
#make it pretty
coef = c("(Intercept)" = beta[1], x1 = beta[2], x2 = beta[3], x3 = beta[4], x4 = beta[5])
return(coef)
}
#I used the following data sample. I want to regress admit on great, gpa and rank.
mydata <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")
## view the first few rows of the data
head(mydata)
#
#institutions with a rank of 1 have the highest prestige, while those with a rank of 4 have the lowest.
mydata$rank <- factor(mydata$rank)
mylogit <- glm(admit ~ gre + gpa + rank, data = mydata, family = "binomial")
summary(mylogit)
rank<- factor(mydata$rank)
rank
class(rank) # make sure that it is really a factor
manual_logistic_regression(cbind(1,mydata$gre, mydata$gpa, rank),mydata$admit)