我尝试从头开始使用梯度下降实现逻辑回归。在绘制成本函数与否后进行梯度下降。迭代次数我发现在一定次数的迭代后成本正在增加。 cost vs no. of iterations
但令人惊讶的是,使用从梯度下降获得的 theta 值,我得到了一个非常好的决策边界。 Decision boundary
是不是因为我使用特征归一化值来绘制。请帮忙。 这是代码。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("ex2data1.txt",header=None)
data_array = data.to_numpy()
X = np.ones(data_array.shape)
Y = np.zeros([data_array.shape[0],1])
X[:,1:] = data_array[:,0:-1]
Y[:,0] = data_array[:,-1]
def feature_normalize(X):
X_=X[:,1:]
X_-=X_.mean(axis=0)
X_/=X_.std(axis=0)
X[:,1:] = X_
return X
X = feature_normalize(X)
m = X.shape[0]
iterations = 550000
alpha = 0.0001
theta = np.zeros((X.shape[1],1))
J_vals = np.zeros((iterations))
def sigmoid(x):
return 1/(1+np.exp(-x))
def cost_function(X,Y,theta):
return (-1/m)*(np.matmul(np.transpose(Y),(np.log(sigmoid(np.matmul(X,theta)))))+(np.matmul(np.transpose(1-Y),(np.log(sigmoid(np.matmul(X,theta)))))))
def gradient_descent(X,Y,theta,iterations,alpha):
for i in range (iterations):
theta = theta- ((alpha/m)*(np.matmul(np.transpose(X),sigmoid(np.matmul(X,theta))-Y)))
J_vals[i] = cost_function(X,Y,theta)
return theta,J_vals
theta,J_vals = gradient_descent(X,Y,theta,iterations,alpha)
plt.plot(J_vals)
x1 = np.reshape(X[:,1],(100,1))
x2 = np.reshape(X[:,2],(100,1))
arg_0 = np.where(Y == 0)
arg_1 = np.where(Y == 1)
ax1=plt.scatter(x1[arg_0],x2[arg_0],c='yellow',marker='o')
ax2 = plt.scatter(x1[arg_1],x2[arg_1],c='black',marker='+')
x = np.arange(-2,3)
y = (-theta[0]-theta[1]*x)/theta[2]
plt.plot(x,y)
plt.xlabel("Exam 1 score")
plt.ylabel("Exam 2 score")
plt.title("Scatter plot of training data")
plt.legend((ax1,ax2),('not admitted','admitted'),loc='upper right')
这是训练数据 ex2data1.txt
P.S 我已经尝试将学习率降低到 0.00001