我正在使用Python 3,并试图估计与最大化Logistic回归模型的似然函数相对应的参数;但不幸的是,我的代码在执行Newton-Raphson过程时反复返回“奇异矩阵”错误。完整的代码如下,带有与Newton Raphson过程相对应的部分已清楚标记。
我怀疑它可能与Hessian不再是正半定(我的代码在Choleski分解失败时打印出“ Hessian不是正半定!”)
import numpy as np
import matplotlib.pyplot as plt
#Define link function here
def g(z):
g=1/(1+np.exp(-z))
return g
#For producing y data values given true paramters theta and number of covariates
def logit_data(n,p, theta):
#Define parameters
#1)Number of covariates
p_i = p+1 #with intercept
p_i=np.int(p_i)
#2) m as correct data type
n=np.int(n)
#4)Specify parameter valueas to be estimated
theta=np.reshape(theta, (p_i,1))
#5)Define distribution from which covariate values are drawn i.i.d., and initiate data values
X=np.zeros((n,p_i))
X[:,0]=1 #intercept
mean=0
sigma=1.5
X[:,1:]=np.random.normal(mean,sigma,(n,p))
#6)Produce y values treating y as a Bernoulli variable with p=g(X*theta)
r=np.random.uniform(0,1,n)
r=np.reshape(r, (len(r),1))
htrue=g(X.dot(theta))
y=htrue-r
y[y>=0]=1
y[y<0]=0
return X, y
#Newton Raphson implementation
def NewtonRaphson(X,y):
##NOTE: All functions negloglikelihood, gradf, hessian, return the -ve of the log likelihood function,
#its gradient and its hessian respectively, to use NR method to minimise f (rather than maximise l)
#Define log likelihood function to be maximised
def negloglikelihood(y,h):
l= y.transpose() @ np.log(h) + (1-y).transpose() @ np.log(1-h)
f=-l
return f
#Define gradient of log likelihood function
def gradf(y, h, X):
a=(y-h).transpose()
gradl= np.matmul(a,X)
grad_f=-gradl
return grad_f
#Define second derivative (Hessian) of log likelihood function
def hessian(h, X):
D=np.identity(len(h))*(np.matmul(h,(1-h).transpose()))
H=-X.transpose() @ D @ X
Hf=-H
return Hf
#Minimise f=-l
#Initiate theta and probability parameter h
thetaEst=np.random.uniform(-5,10,(p+1,1))
eta=np.matmul(X,thetaEst)
h=g(eta)
while not (gradf(y,h,X)==0).all():
H=hessian(h,X)
print(H)
try:
np.linalg.cholesky(H)
except np.linalg.LinAlgError:
print('Hessian not positive semi-definite!')
h=g(np.matmul(X,thetaEst))
delta=np.linalg.solve(hessian(h,X), np.reshape(gradf(y,h,X),(6,1)))
while negloglikelihood(y, h) < negloglikelihood(y, g(np.matmul(X,thetaEst+delta))):
delta=0.5*delta
thetaEst=thetaEst+delta
return thetaEst
#Main control
#1)Sample numbers to test for erros in beta, as powers of 10.
npowers=np.arange(1,3,0.5)
n=np.power(10,npowers)
#2)Number of independent covariates
p=5
#3)True theta to be estimated (parameter values)
theta=np.asarray([1,1.2,1.1,0.8,0.9,1.3])
#4)#Initiate arrays to store estimates of theta (and errors) computed at specified sample numbers N
Thetas=np.zeros((len(npowers),p+1))
Errors=np.zeros((len(npowers),p+1))
#5)Obtain random covariate values from specified distribution, and corresponding y values using true theta
#plus gaussian noise term.
X,y = logit_data(n[-1],p,theta)
#6)Calulcate cumulative means for given n values, for the theta estimates
for ind,N in enumerate(n):
N=np.int(N)
thetaTemp=NewtonRaphson(X[0:N,:],y[0:N])
Thetas[ind,:] = np.reshape(thetaTemp,6)
#7)Calculate true erros
Errors=Thetas-theta.transpose()
absError=np.abs(Errors)
nerror=Errors*np.sqrt(n)[:,np.newaxis]
#8)Save data as csv
#9)Plots
fig=plt.figure()
for i in range(p+1):
plt.plot(npowers, np.log10(absError[:,i]))
fig.suptitle('log10(AbsError) against log10(Number of samples) for error in maximum lieklihood estimator in gaussian model')
plt.xlabel('log_10(Number of sample data points)')
plt.ylabel('log_10(Absolute Error)')
fig.savefig('gaussianerrors.png')
plt.show()
编辑:我对Theta的估计似乎也大相径庭!现在,在奇异矩阵的情况下,我将中止NR函数并按原样返回Theta的估计值。我得到的theta的估计值是10 ^ 230,尽管实际值都是1阶!