我尝试使用类似于octave程序的结构在python中实现逻辑回归的正则化版本,除了我也编写了Gradient Descent。问题是,为什么成本值趋于增加,为什么降低至一定值,我感到很困惑。 建议将不胜感激。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dataset = pd.read_csv('G:\Coding\python\Machine Learning\Andrew_Python\Logistic Regression\ex2data2.txt',header=None)
data = dataset.values
[m,n] = dataset.shape
X = np.append( np.ones((m,1)) , data[:,:n-1].reshape(m,n-1), axis=1)
y = data[:,n-1].reshape(m,1)
y1 = np.asarray(np.where(y==1)[0])
y0 = np.asarray(np.where(y==0)[0])
plt.plot(X[y1,1],X[y1,2],'bo',X[y0,1],X[y0,2],'ro')
plt.xlabel('Chip 1')
plt.ylabel('Chip 2')
plt.legend(['Approved','Not Approved'])
plt.show()
def mapFeature(X):
out = np.ones([m,1])
X1 = X[:,1].reshape(m,1)
X2 = X[:,2].reshape(m,1)
for i in range(1,7):
for j in range(0,i+1):
out = np.append(out, np.multiply(np.power(X1,i-j),np.power(X2,j)), axis=1)
return out
X_reg = np.matrix(mapFeature(X))
m,n = np.shape(X_reg)
theta = np.zeros([n,1])
print(X_reg.shape)
print(y.shape)
lmda =1
def sigmoid(theta,X):
h = np.dot(X,theta)
return 1/ (1 + np.exp(-h))
def computeCost(theta,X,y,lmda = 0.):
H = sigmoid( theta, X)
term1 = np.dot( -y.T , np.log(H))
term2 = np.dot( 1-y.T , np.log(1-H))
reg = np.dot(theta[1:].T, theta[1:])*lmda/2
J = np.sum(term1 - term2 + reg)/m
return J
alpha = 1
def gradientDescent(X, y, theta, m, lmda, iter, alpha):
J = []
for i in range(1,400):
H = sigmoid(theta, X)
theta_reg = theta
theta_reg[0,0]=0
theta = theta - np.dot( np.transpose(X), H - y )*alpha/m + ( theta_reg )*(alpha*lmda)/m
J.append( computeCost(theta, X,y, lmda) )
return theta,J
theta,J = gradientDescent(X_reg,y,theta,m,lmda,iter,alpha)
链接到使用的数据(不是我的):https://github.com/SaveTheRbtz/ml-class/blob/master/ex2/ex2data2.txt
编辑:我碰巧通过摆脱theta_reg并将其替换为
解决了该问题。theta0 = np.dot(X.transpose() , (H-y))[0]
theta1 = np.dot(X.transpose() , (H-y))[1:] + lmda*theta[1:]
grad = np.append( theta0 , theta1 , axis=0)
theta = theta - (grad)*alpha/m
给出了预期的答案,但我仍然不知道为什么theta_reg无法正常工作。