基本思想是(1)生成theta [1-11]的测试数据(2)生成具有正常噪声的y。
import numpy as np
# get a matrix with 1000 * 10
X = np.random.random(size=(1000,10))
# get a theta with [1-11]
true_theta = np.arange(1, X.shape[1] + 2, dtype=float)
# add a column as interception with all ones
X_b = np.hstack([np.ones((len(X), 1)), X])
# set the y with x*theta add a normal noise
y = X_b.dot(true_theta) + np.random.normal(size=len(X))
# define the loss function
def J(theta, X_b, y):
return np.sum((y - X_b.dot(theta)) ** 2) / len(X_b)
return float("inf")
# get the gradient by matrix cal
def dJ(theta, X_b, y):
return X_b.T.dot(X_b.dot(theta) - y) *2. /len(y)
def gradient_descent(dJFunc, X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-4):
theta = initial_theta
cur_iter = 0
while cur_iter < n_iters:
gradient = dJFunc(theta, X_b, y)
last_theta = theta
theta = theta - eta * gradient
#print("iter", cur_iter, " val:" , theta)
if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
cur_iter +=1
return theta
initial_theta = np.zeros(X_b.shape[1])
eta = 0.01
theta = gradient_descent(dJ, X_b, y, initial_theta, eta)
最后theta应该是:1、2、3 ...或接近。 但结果是****但在下面:
[2.78868669 1.84980005 2.56140747 3.72683559 4.73750013 5.62476797
6.60607619 7.72255833 8.47585309 9.67780203 10.54318556]