Question

我写了下面的代码来测试梯度下降，但是没有返回正确的答案：
基本思想是（1）生成theta [1-11]的测试数据（2）生成具有正常噪声的y。

import numpy as np
np.random.seed(666)
# get a matrix with 1000 * 10
X = np.random.random(size=(1000,10))
# get a theta with [1-11]
true_theta = np.arange(1, X.shape[1] + 2, dtype=float)
# add a column as interception with all ones
X_b = np.hstack([np.ones((len(X), 1)), X])

# set the y with x*theta add a normal noise
y = X_b.dot(true_theta) + np.random.normal(size=len(X))

# define the loss function
def J(theta, X_b, y):
    try:
        return np.sum((y - X_b.dot(theta)) ** 2) / len(X_b)
    except:
        return float("inf")

# get the gradient by matrix cal
def dJ(theta, X_b, y):
    return X_b.T.dot(X_b.dot(theta) - y) *2. /len(y)

def gradient_descent(dJFunc, X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-4):
    theta = initial_theta
    cur_iter = 0
    while cur_iter < n_iters:
        gradient = dJFunc(theta, X_b, y)
        last_theta = theta
        theta = theta - eta * gradient
        #print("iter", cur_iter, " val:" , theta)
        if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
            break

        cur_iter +=1
    return theta

initial_theta = np.zeros(X_b.shape[1])
eta = 0.01
theta = gradient_descent(dJ, X_b, y, initial_theta, eta)
print(theta)

最后theta应该是：1、2、3 ...或接近。但结果是****但在下面：

[2.78868669  1.84980005  2.56140747  3.72683559  4.73750013  5.62476797
6.60607619  7.72255833  8.47585309  9.67780203 10.54318556]

有人可以帮我吗？

为什么线性梯度下降在下面的代码中不起作用

0 个答案: