神经网络收敛到目标输出的平均值,不适用于多个输入

时间:2019-11-09 11:42:51

标签: python machine-learning neural-network

我是ML的初学者,并尝试在Python中实现具有S形神经元和二次成本函数的神经网络。它对于简单的1输入情况很好用,但对于更复杂的情况(如涉及更多输入的XOR)却可以解决。输出只是目标值的平均值。例如,当我尝试在“与”门上训练网络时,该门接收输入[[0,0],[0,1],[1,0],[1,1]],并目标输出[0,0 [,0,1],输出收敛到0.25左右的平均值。在训练具有多个输入的其他测试时,我遇到了同样的问题。我高度怀疑问题的所在是权重和偏差梯度的累积。有人可以帮我找到我的代码有什么问题吗?

import numpy as np

class Network:
    def __init__(self, layers):
        self.layers = layers
        # activations for each layer
        self.activations = self.initialize_layers()
        # bias for each layer
        self.bias = self.initialize_layers()
        # weights stored as a matrix for each layer
        self.weights = self.initialize_weights(is_random = False)
        # sum of weighted inputs and bias for each layer
        self.zs = self.initialize_layers()

    def initialize_layers(self):
        return list(map(
            lambda current_layer:
                np.zeros(current_layer),
            self.layers[1:]
        ))

    def initialize_weights(self, is_random):
        weights = []
        for index, current_layer in enumerate(self.layers):
            if index == 0:
                continue
            weights.append(
                np.random.rand(
                    current_layer,
                    self.layers[index - 1]
                ) if is_random
                else np.zeros((current_layer, self.layers[index - 1]))
            )
        return weights

    def forward(self, inputs, layer_number):
        if layer_number >= len(self.activations):
            return
        else:
            if layer_number == 0:
                previous_activations = inputs
            else:
                previous_activations = self.activations[layer_number - 1]
            current_weights = self.weights[layer_number]
            current_bias = self.bias[layer_number]
            z = np.dot(current_weights, np.transpose(previous_activations)) + current_bias
            self.zs[layer_number] = z
            self.activations[layer_number] = sigmoid(z)

        self.forward(inputs, layer_number + 1)

    def backward(self, targets, sigmas, weight_gradients, bias_gradients, layer_number):
        if layer_number == len(self.activations) - 1:
            weight_gradients = self.initialize_weights(is_random = False)
            bias_gradients = self.initialize_layers()
            sigmas[-1] = cost_derivative(self.activations[-1], targets) * sigmoidPrime(self.zs[-1])
        elif layer_number < 0:
            return
        else:
            sigmas[layer_number] = (
                np.dot(np.transpose(self.weights[layer_number + 1]), sigmas[layer_number + 1])
                * sigmoidPrime(self.zs[layer_number])
            )

        bias_gradients[layer_number] = sigmas[layer_number]

        weight_gradient = []
        for sigma in sigmas[layer_number]:
            row = []
            for activation in self.activations[layer_number - 1]:
                row.append(sigma * activation)
            weight_gradient.append(row)
        weight_gradients[layer_number] = weight_gradient
        self.backward(targets, sigmas, weight_gradients, bias_gradients, layer_number - 1)
        return (weight_gradients, bias_gradients)

    def gradient_descent(self, weight_gradients, bias_gradients, learning_rate, input_length):
        def gradient_helper(gradient_tuple):
            current_value = gradient_tuple[0]
            gradient = gradient_tuple[1]
            return current_value - (learning_rate / input_length) * gradient
        def apply_gradients(values, gradients):
            return list(map(
                gradient_helper,
                zip(values, gradients)
            ))
        self.weights = apply_gradients(self.weights, weight_gradients)
        self.bias = apply_gradients(self.bias, bias_gradients)

    def iterate(self, training_data, learning_rate, epochs):
        for i in range(epochs):
            sigmas = self.initialize_layers()
            weight_gradients = self.initialize_weights(is_random = False)
            bias_gradients = self.initialize_layers()
            for row in training_data:
                input_data = row[0]
                target_data = row[1]
                self.forward(input_data, 0)
                (delta_weight_gradients, delta_bias_gradients) = self.backward(target_data, sigmas, None, None, len(self.activations) - 1)
                weight_gradients = [gradient + delta_gradient for (gradient, delta_gradient) in zip(weight_gradients, delta_weight_gradients)]
                bias_gradients = [gradient + delta_gradient for (gradient, delta_gradient) in zip(bias_gradients, delta_bias_gradients)]

            self.gradient_descent(weight_gradients, bias_gradients, learning_rate, len(input_data))

    def predict(self, inputs):
        predictions = []
        for input_data in inputs:
            self.forward(input_data, 0)
            predictions.append(self.activations[-1])
        return predictions

def cost_derivative(activation, y):
    return activation - y

def sigmoid(matrix):
    return 1 / (1 + np.exp(-matrix))

def sigmoidPrime(matrix):
    sig = sigmoid(matrix)
    return sig * (1 - sig)

net = Network([2, 2, 1])
# print(net.activations[-1])

training_data = [
    ([0, 0], [0]),
    ([0, 1], [1]),
    ([1, 0], [1]),
    ([1, 1], [0])
]

net.iterate(training_data, learning_rate = 0.5, epochs = 1000)
print(net.predict(map(lambda data: data[0], training_data)))

0 个答案:

没有答案