我不是针对任何神经网络而是针对回归案例实施上述操作。我无法注意到参数的任何变化或随着迭代次数而减少的损失。我已经对原始代码进行了一些调整,但似乎仍然缺少某些东西:
import numpy as np
import matplotlib.pyplot as plt
# Generating data
m,n = 10000,4
x = np.random.multivariate_normal(np.zeros(n), np.eye(n), size=m).transpose()
z = np.random.normal(loc=0, scale=0.25, size=(1,m))
theta = np.array([1,0.5,0.25,0.125]).reshape(n,1)
theta0 = 2
y = np.transpose(theta).dot(x) + theta0*np.ones((1,m)) + z
# defining the cost function
def compute_cost(x,y,theta):
return np.sum(np.power(y-np.transpose(theta).dot(x)-theta0,2))/2/m
# initializations
alp = 1e-4
num_iterations = 10000
# Gradient Step
def gradient_step(theta, x, y, alp):
theta0=2
idx = np.arange(x.shape[-1])
np.random.shuffle(idx)
x = x[..., idx]
y = y[..., idx]
for j in range(0,n): # For each dimension of the input
batch_sum = 0
for k in range(0,10): # Batch size of 10
batch_sum += (y[:,k]-np.transpose(theta).dot(x[:,k])-theta0)*x[:,k][j]
theta[j] += (alp/10)*batch_sum
return theta
# gradient descent
cost_vec = []
for i in range(num_iterations):
cost_vec.append(compute_cost(x, y, theta))
theta = gradient_step(theta, x, y, alp)
任何建议将不胜感激。谢谢。