Question

我正在使用Python 3，并试图估计与最大化Logistic回归模型的似然函数相对应的参数；但不幸的是，我的代码在执行Newton-Raphson过程时反复返回“奇异矩阵”错误。完整的代码如下，带有与Newton Raphson过程相对应的部分已清楚标记。

我怀疑它可能与Hessian不再是正半定（我的代码在Choleski分解失败时打印出“ Hessian不是正半定！”）

import numpy as np
import matplotlib.pyplot as plt


#Define link function here
def g(z):
    g=1/(1+np.exp(-z))
    return g

#For producing y data values given true paramters theta and number of covariates
def logit_data(n,p, theta):
    #Define parameters

    #1)Number of covariates
    p_i = p+1  #with intercept
    p_i=np.int(p_i)

    #2) m as correct data type
    n=np.int(n)

    #4)Specify parameter valueas to be estimated
    theta=np.reshape(theta, (p_i,1))

    #5)Define distribution from which covariate values are drawn i.i.d., and initiate data values
    X=np.zeros((n,p_i))
    X[:,0]=1    #intercept
    mean=0
    sigma=1.5

    X[:,1:]=np.random.normal(mean,sigma,(n,p))


    #6)Produce y values treating y as a Bernoulli variable with p=g(X*theta)
    r=np.random.uniform(0,1,n)
    r=np.reshape(r, (len(r),1))
    htrue=g(X.dot(theta))
    y=htrue-r
    y[y>=0]=1
    y[y<0]=0

    return X, y





#Newton Raphson implementation
def NewtonRaphson(X,y):
    ##NOTE: All functions negloglikelihood, gradf, hessian, return the -ve of the log likelihood function,
    #its gradient and its hessian respectively, to use NR method to minimise f (rather than maximise l)

    #Define log likelihood function to be maximised
    def negloglikelihood(y,h):
        l= y.transpose() @ np.log(h) + (1-y).transpose() @ np.log(1-h)
        f=-l
        return f

    #Define gradient of log likelihood function
    def gradf(y, h, X):
        a=(y-h).transpose()
        gradl= np.matmul(a,X)
        grad_f=-gradl
        return grad_f

    #Define second derivative (Hessian) of log likelihood function
    def hessian(h, X):
        D=np.identity(len(h))*(np.matmul(h,(1-h).transpose()))
        H=-X.transpose() @ D @ X
        Hf=-H
        return Hf

    #Minimise f=-l

    #Initiate theta and probability parameter h
    thetaEst=np.random.uniform(-5,10,(p+1,1))

    eta=np.matmul(X,thetaEst)
    h=g(eta)

    while not (gradf(y,h,X)==0).all():
        H=hessian(h,X)
        print(H)
        try:
            np.linalg.cholesky(H)
        except np.linalg.LinAlgError:
            print('Hessian not positive semi-definite!')

        h=g(np.matmul(X,thetaEst))  
        delta=np.linalg.solve(hessian(h,X), np.reshape(gradf(y,h,X),(6,1)))

        while negloglikelihood(y, h) < negloglikelihood(y, g(np.matmul(X,thetaEst+delta))):
            delta=0.5*delta

        thetaEst=thetaEst+delta

    return thetaEst









#Main control

#1)Sample numbers to test for erros in beta, as powers of 10.
npowers=np.arange(1,3,0.5)
n=np.power(10,npowers)


#2)Number of independent covariates
p=5


#3)True theta to be estimated (parameter values)
theta=np.asarray([1,1.2,1.1,0.8,0.9,1.3])


#4)#Initiate arrays to store estimates of theta (and errors) computed at specified sample numbers N
Thetas=np.zeros((len(npowers),p+1))
Errors=np.zeros((len(npowers),p+1))


#5)Obtain random covariate values from specified distribution, and corresponding y values using true theta
#plus gaussian noise term.
X,y = logit_data(n[-1],p,theta)


#6)Calulcate cumulative means for given n values, for the theta estimates
for ind,N in enumerate(n):
    N=np.int(N)
    thetaTemp=NewtonRaphson(X[0:N,:],y[0:N])
    Thetas[ind,:] = np.reshape(thetaTemp,6)



#7)Calculate true erros 
Errors=Thetas-theta.transpose()
absError=np.abs(Errors)
nerror=Errors*np.sqrt(n)[:,np.newaxis]


#8)Save data as csv


#9)Plots
fig=plt.figure()
for i in range(p+1):
    plt.plot(npowers, np.log10(absError[:,i]))

fig.suptitle('log10(AbsError) against log10(Number of samples) for error in maximum lieklihood estimator in gaussian model')
plt.xlabel('log_10(Number of sample data points)')
plt.ylabel('log_10(Absolute Error)')

fig.savefig('gaussianerrors.png')
plt.show()

编辑：我对Theta的估计似乎也大相径庭！现在，在奇异矩阵的情况下，我将中止NR函数并按原样返回Theta的估计值。我得到的theta的估计值是10 ^ 230，尽管实际值都是1阶！

牛顿拉夫森实现逻辑函数的MLE估计中反复遇到“奇异矩阵”错误

0 个答案: