Question

我试图在python中从头开始编写神经网络。为了检查一切是否正常，我想过度填充网络，但损失似乎首先爆炸，然后回到初始值并停在那里（没有收敛）。我检查了我的代码并找到了原因。我认为我对反向传播的理解或实施是不正确的，但可能还有其他原因。任何人都可以帮助我，或者至少指出我正确的方向吗？

# Initialize weights and biases given dimesnsions (For this example the dimensions are set to [12288, 64, 1])
def initialize_parameters(dims):
    # Initiate parameters
    parameters = {} 
    L = len(dims) # Number of layers in the network

    # Loop over the given dimensions. Initialize random weights and set biases to zero.
    for i in range(1, L):
        parameters["W" + str(i)] = np.random.randn(dims[i], dims[i-1]) * 0.01
        parameters["b" + str(i)] = np.zeros([dims[i], 1])

    return parameters

# Activation Functions
def relu(x, deriv=False):
    if deriv:
        return 1. * (x > 0)
    else:
        return np.maximum(0,x)

def sigmoid(x, deriv=False):
    if deriv:
        return x * (1-x)
    else:
        return 1/(1 + np.exp(-x))


# Forward and backward pass for 2 layer neural network. (1st relu, 2nd sigmoid)
def forward_backward(X, Y, parameters):
    # Array for storing gradients
    grads = {}

    # Get the length of examples
    m = Y.shape[1]

    # First layer
    Z1 = np.dot(parameters["W1"], X) + parameters["b1"]
    A1 = relu(Z1)

    # Second layer
    Z2 = np.dot(parameters["W2"], A1) + parameters["b2"]
    AL = sigmoid(Z2)

    # Compute cost
    cost = (-1 / m) * np.sum(np.multiply(Y, np.log(AL)) + np.multiply(1 - Y, np.log(1 - AL)))

    # Backpropagation
    # Second Layer
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    dZ2 = dAL * sigmoid(AL, deriv=True)
    grads["dW2"] = np.dot(dZ2, A1.T) / m
    grads["db2"] = np.sum(dZ2, axis=1, keepdims=True) / m

    # First layer
    dA1 = np.dot(parameters["W2"].T, dZ2)
    dZ1 = dA1 * relu(A1, deriv=True)
    grads["dW1"] = np.dot(dZ1, X.T)
    grads["db1"] = np.sum(dZ1, axis=1, keepdims=True) / m

    return AL, grads, cost

# Hyperparameters
dims = [12288, 64, 1]
epoches = 2000
learning_rate = 0.1

# Initialize parameters
parameters = initialize_parameters(dims)
log_list = []

# Train the network
for i in range(epoches):
    # Get X and Y
    x = np.array(train[0:10],ndmin=2).T
    y = np.array(labels[0:10], ndmin=2).T

    # Perform forward and backward pass
    AL, grads, cost = forward_backward(x, y, parameters)

    # Compute cost and append to the log_list
    log_list.append(cost)

    # Update parameters with computed gradients 
    parameters = update_parameters(grads, parameters, learning_rate)

plt.plot(log_list)
plt.title("Loss of the network")
plt.show()

Answer 1

我很难找到计算误差梯度的地方，输入训练数据样本也会有帮助...

我不知道这对你有帮助，但我会分享我的Python神经网络解决方案来学习XOR问题。

import numpy as np


def sigmoid_function(x, derivative=False):
    """
    Sigmoid function
    “x” is the input and “y” the output, the nonlinear properties of this function means that
    the rate of change is slower at the extremes and faster in the centre. Put plainly,
    we want the neuron to “make its mind up” instead of indecisively staying in the middle.
    :param x: Float
    :param Derivative: Boolean
    :return: Float
    """
    if (derivative):
        return x * (1 - x)  # Derivative using the chain rule.
    else:
        return 1 / (1 + np.exp(-x))


# create dataset for XOR problem
input_data = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
ideal_output = np.array([[0.0], [1.0], [1.0], [0.0]])


#initialize variables
learning_rate = 0.1
epoch = 50000 #number or iterations basically - One round of forward and back propagation is called an epoch

# get the second element from the numpy array shape field to detect the count of features for input layer
input_layer_neurons = input_data.shape[1]
hidden_layer_neurons = 3 #number of hidden layer neurons
output_layer_neurons = 1 #number of output layer neurons

#init weight & bias
weights_hidden = np.random.uniform(size=(input_layer_neurons, hidden_layer_neurons))
bias_hidden = np.random.uniform(1, hidden_layer_neurons)
weights_output = np.random.uniform(size=(hidden_layer_neurons, output_layer_neurons))
bias_output = np.random.uniform(1, output_layer_neurons)

for i in range(epoch):

    #forward propagation
    hidden_layer_input_temp = np.dot(input_data, weights_hidden) #matrix dot product to adjust for weights in the layer
    hidden_layer_input = hidden_layer_input_temp + bias_hidden #adjust for bias
    hidden_layer_activations = sigmoid_function(hidden_layer_input) #use the activation function
    output_layer_input_temp = np.dot(hidden_layer_activations, weights_output)
    output_layer_input = output_layer_input_temp + bias_output
    output = sigmoid_function(output_layer_input) #final output

    #backpropagation (where adjusting of the weights happens)
    error = ideal_output - output #error gradient
    if (i % 1000 == 0):
        print("Error: {}".format(np.mean(abs(error))))

    #use derivatives to compute slope of output and hidden layers
    slope_output_layer = sigmoid_function(output, derivative=True)
    slope_hidden_layer = sigmoid_function(hidden_layer_activations, derivative=True)

    #calculate deltas
    delta_output = error * slope_output_layer
    error_hidden_layer = delta_output.dot(weights_output.T) #calculates the error at hidden layer
    delta_hidden = error_hidden_layer * slope_hidden_layer

    #change the weights
    weights_output += hidden_layer_activations.T.dot(delta_output) * learning_rate
    bias_output += np.sum(delta_output, axis=0, keepdims=True) * learning_rate
    weights_hidden += input_data.T.dot(delta_hidden) * learning_rate
    bias_hidden += np.sum(delta_hidden, axis=0, keepdims=True) * learning_rate

我的反向传播有什么问题？

1 个答案: