但是我对实现有一些疑问:
在学习率<0.5或动量> 0.9的情况下,网络倾向于陷入局部最优状态,损失=〜1。我认为这是因为步长不足以逃避此问题,但是有办法克服吗?还是这与正在解决且不可避免的数据的本质有关。
import numpy as np
import matplotlib.pyplot as plt
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
sig = 1 / (1 + np.exp(-x))
return sig * (1 - sig)
class NeuralNetwork:
def __init__(self, x, y):
self.input = x
self.weights1 = np.random.rand(self.input.shape[1], 4)
self.weights2 = np.random.rand(4, 1)
self.y = y
self.output = np.zeros(self.y.shape)
self.v_dw1 = 0
self.v_dw2 = 0
self.alpha = 0.5
self.beta = 0.5
def feedforward(self):
self.layer1 = sigmoid(np.dot(self.input, self.weights1))
self.output = sigmoid(np.dot(self.layer1, self.weights2))
def backprop(self, alpha, beta):
# application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
d_weights1 = np.dot(self.input.T, (np.dot(2*(self.y - self.output) *
sigmoid_derivative(self.output), self.weights2.T) *
sigmoid_derivative(self.layer1)))
# adding effect of momentum
self.v_dw1 = (beta * self.v_dw1) + ((1 - beta) * d_weights1)
self.v_dw2 = (beta * self.v_dw2) + ((1 - beta) * d_weights2)
# update the weights with the derivative (slope) of the loss function
self.weights1 = self.weights1 + (self.v_dw1 * alpha)
self.weights2 = self.weights2 + (self.v_dw2 * alpha)
if __name__ == "__main__":
X = np.array([[0, 0, 1],
[0, 1, 1],
[1, 0, 1],
[1, 1, 1]])
y = np.array([[0], [1], [1], [0]])
nn = NeuralNetwork(X, y)
total_loss = []
for i in range(10000):
nn.feedforward()
nn.backprop(nn.alpha, nn.beta)
total_loss.append(sum((nn.y-nn.output)**2))
iteration_num = list(range(10000))
plt.plot(iteration_num, total_loss)
plt.show()
print(nn.output)
答案 0 :(得分:0)
第一件事,在您的“ sigmoid_derivative(x)”中,此函数的输入已经是Sigmoid的输出,但是您又得到了Sigmoid,然后计算了导数,这是一个问题,应该是:
return x * (1 - x)
第二个问题,您没有使用任何偏见,您如何知道您的决策边界将跨越问题假设空间中的原点?因此您需要添加一个偏差项。
最后一件事,我认为您的导数是不正确的,您可以参考coursera.org的Andrew Ng深度学习课程第1周(第2周),以获取在神经网络中计算反向传播的通用公式列表,以确保您做对了。