我在Python中实现了线性回归的梯度下降,用于从csv文件读取数据。下面是代码:
import numpy as np
import scipy.optimize as op
from sklearn import preprocessing
import matplotlib.pyplot as plt
from matplotlib import style
def CostFunc(theta,x,y):
m,n = x.shape;
theta = theta.reshape((n,1));
#y = y.reshape((m,1));
#x=x.reshape((m,1))
J = 0.5*sum(np.square((X.dot(theta)-y)))[0]/m;
return J;
def gradientDescent(X, y, theta, alpha, num_iters):
m,n = X.shape;
initial_iter = num_iters
J = np.zeros((num_iters,1));
while num_iters > 0:
theta = theta - (alpha*np.sum(((X.dot(theta)-y)*X),axis=0)/m).reshape((n,1))
J[initial_iter-num_iters][0]=CostFunc(theta,X,y)
num_iters=num_iters-1
return theta,J;
data = np.loadtxt(open("ex1data2.txt","rb"),delimiter=",",skiprows=1)
nr,nc = data.shape
X=data[:,0:nc - 1]
X=preprocessing.scale(X)
X=np.insert(X,0,1,axis=1)
y= data[:,[nc - 1]]
m , n = X.shape;
#initial_theta = np.zeros(len(n));
initial_theta = np.zeros((n,1));
theta,J=gradientDescent(X,y,initial_theta,0.01,400)
J=J.reshape(400)
count=0
grp=np.zeros((400,2));
for value in J:
grp[count][0] = count+1
grp[count][1] = value
count = count+1
plt.plot(grp)
plt.legend(loc=4)
plt.xlabel('iter')
plt.ylabel('cost')
plt.show()
print(theta)
我进行了400次迭代,得到了一个成本减少的情节,如下iteration vs cost plot
然后我使用以下代码
对同一数据使用了一种自定义算法import numpy as np
import scipy.optimize as op
from sklearn import preprocessing
def Sigmoid(z):
return 1/(1 + np.exp(-z));
def Gradient(theta,x,y):
m , n = x.shape
theta = theta.reshape((n,1));
#y = y.reshape((m,1))
#x=x.reshape((m,1))
#sigmoid_x_theta = Sigmoid(x.dot(theta));
grad = (np.sum(((X.dot(theta)-y)*X),axis=0)/m);
return grad.flatten();
def CostFunc(theta,x,y):
m,n = x.shape;
theta = theta.reshape((n,1));
#y = y.reshape((m,1));
#x=x.reshape((m,1))
J = 0.5*sum(np.square((X.dot(theta)-y)))[0]/m;
return J;
data = np.loadtxt(open("ex1data2.txt","rb"),delimiter=",",skiprows=1)
nr,nc = data.shape
X=data[:,0:nc - 1]
X=preprocessing.scale(X)
X=np.insert(X,0,1,axis=1)
y= data[:,[nc - 1]]
m , n = X.shape;
#initial_theta = np.zeros(len(n));
initial_theta = np.zeros((n,1));
Result = op.minimize(fun = CostFunc,
x0 = initial_theta,
args = (X, y),
method = 'TNC',
jac = Gradient);
print("====================")
print(Result)
#optimal_theta = Result.x;
第一个案例的theta值是 [[333032.07465084] [100130.7408761] [3699.66611303]]
从第二次开始,我得到以下输出 好玩的:2066502781.7118049 jac:array([-6.45345806e-11,7.84261236e-10,-2.42370452e-10]) 消息:'达到了本地最小值(| pg |〜= 0)' nfev:27 尼特:13 状态:0 成功:是的 x:数组([339119.45652174,110248.92165868,-6226.22670554])
我假设x:array([339119.45652174,110248.92165868,-6226.22670554])是theta值。
为什么我在两种情况下得到不同的值?