我正在从头开始编码神经网络,设置了随机权重后,我做了一次正向传播,然后使用均方误差计算了网络的总误差。
现在我应该计算权重的梯度,但是每个权重的梯度与训练示例的数量一样多,因此我应该使用哪个梯度来更新每个权重?
我考虑过对每个权重的梯度求平均,但这是个好主意吗?
这是我用于计算每个重量的梯度的公式: 每个训练示例都会有所不同,这意味着我将为每个权重使用与训练示例一样多的渐变。 我该怎么办??
答案 0 :(得分:0)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files
uploaded = files.upload()
data = pd.read_csv('ex1data1.txt', header = None) #read from dataset
m = len(data) # number of training example
print(m)
data.head(5) # view first few rows of the data
data.describe()
X = data.iloc[:,0] # read first column
y = data.iloc[:,1] # read second column
plt.scatter(X, y)
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
plt.show()
msk = np.random.rand(len(data)) < 0.8
train = data[msk]
#print(train)
test = data[~msk]
#print(test)
from sklearn import linear_model
regr = linear_model.LinearRegression()
train_x = np.asanyarray(train[[0]])
#print(train_x)
train_y = np.asanyarray(train[[1]])
theta = np.zeros([2,1])
regr.fit (train_x, train_y)
# The coefficients
print ('Coefficients: ', regr.coef_)
print ('Intercept: ',regr.intercept_)
#theta
#theta = pd.DataFrame(data )#np.zeros([len(train_y),1])
#np.dot(train_x, theta)
#print(train_x, theta.shape, train_y.shape)
train_x = train.iloc[:,0]
train_y = train.iloc[:,1]
theta.shape
#train_x = train_x[np.newaxis,:]
#train_y = train_y[np.newaxis,:]
#train_x = np.squeeze(np.asarray(train_x))
#train_y = np.squeeze(np.asarray(train_y))
theta = 0 #np.zeros([len(train_y),1])
def computeCost(train_x, train_y, theta):
temp = np.dot(train_x, theta) - train_y
return np.sum(np.power(temp, 2)) / (2*m)
J = computeCost(train_x, train_y, theta)
print(J)
#theta = np.zeros([len(y),1])
iterations=20
alpha=0.01
def gradientDescent(X, y, theta, alpha, iterations):
for _ in range(iterations):
temp = np.dot(X, theta) - y
temp = np.dot(X.T, temp)
theta = theta - (alpha/m) * temp
return theta
theta = gradientDescent(X, y, theta, alpha, iterations)
print(theta)
J = computeCost(X, y, theta)
print(J)
print(theta)
plt.scatter(X, y)
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')
plt.plot([X.tolist()], np.dot([y.tolist()], theta))
plt.show()