我尝试使用python和numpy编写逻辑回归(使用sklearn.datasets
生成数据集)。代码如下:
import numpy as np
import numpy.linalg as la
def sigmoid(x):
return 1.0/(np.exp(-x) + 1)
def Cost(X,y,w,Lambda):
#return regularised least squares cost function
m = y.size
cost = -sum(y*(np.log(sigmoid(np.dot(X,w)))) + (1-y)*(np.log(1-sigmoid(np.dot(X,w)))))/(m) + Lambda*sum(w[1:].T.dot(w[1:]))/(2*m);
return cost[0]
def gradient(X,y,w,Lambda):
#return gradient vector of cost function
m = y.size
grad = ((X.T).dot(sigmoid(X.dot(w) - y)))/m + Lambda*w/m
grad[0] = grad[0] - Lambda*w[0]/m
return grad
def gd(X,y,w,alpha,num_iters,Lambda):
#perform gradient descent
Cost_history = np.zeros((num_iters,1))
for i in range(num_iters):
Cost_history[i] = Cost(X,y,w,Lambda)
w = w - alpha*gradient(X,y,w,Lambda)
return w, Cost_history
def sgd(X,y,w,alpha,num_iters,Lambda):
pass
def fit(X,y,Lambda=None,alpha=None,num_iters=None):
if Lambda == None:
Lambda = 0;
if alpha == None:
alpha = 0.03
if num_iters == None:
num_iters = 50
m = X.shape[0]
D = X.size//X.shape[0]
X = np.c_[np.ones((m,1)),X]
#w = np.random.rand(D,1)
w = np.zeros((D+1,1))
#w = np.r_[(np.arange(1)+1).reshape(1,1),w]
w, Cost_history = gd(X,y,w,alpha,num_iters,Lambda)
return w, Cost_history
def predict(X,w):
m = X.shape[0]
X = np.c_[np.ones((m,1)), X]
y = X.dot(w)
for i in range(m):
if y[i]>=0:
y[i] = 1
else:
y[i] = 0
return y
我用来生成数据集的脚本是:
from sklearn.datasets import make_blobs
X, y = make_blobs(n_features=2, centers=2)
y = y.reshape(y.size,1)
并且培训由以下人员完成:
w, cost_history = fit(X,y,alpha=0.001,num_iters=1000)
决策边界图如下:
cost_history图看起来像这样(vs epochs):
为什么会这样?因为成本函数似乎是正确的,梯度函数也是如此,我似乎无法弄清楚为什么成本会发散,而用正则化线性回归修改的相同代码工作正常。
情节代码是:
from matplotlib import pyplot as plt
plt.scatter(X[:,0],X[:,1],marker='o',c=y)
plt.plot(linspace(-10,10),-w[0]/w[2] + w[1]*linspace(-10,10)/w[2],'k-')
plt.show()
[基本上我只是一个学习者而且我正试图从头开始创建一个小型ML技术的迷你库来获得一种感觉。我做得对吗?或者我只是使用sklearn的基本工具,然后从那里开始学习更复杂的东西?另外,对于糟糕的结构化问题感到抱歉,因为我只是一个初学者。]