
时间:2017-10-16 12:34:00

标签: python numpy machine-learning neural-network backpropagation




  • *运算符doesn't perform matrix multiplication,您必须使用numpy.dot。例如,这会影响这些行:network_input * self.layer1[neuron]self.layer1_activations[weight]*self.layer2[neuron][weight]等。

  • 好像你是通过分类来解决你的问题(选择21个班级中的1个),但使用L2丢失。这有点混乱。您有两种选择:要么坚持分类并使用cross entropy loss function,要么使用L2丢失执行回归(即预测数值)。

  • 你绝对应该提取sigmoid函数以避免再次编写相同的表达式:

    def sigmoid(z):
      return 1 / (1 + np.exp(-z))
    def sigmoid_derivative(x):
      return sigmoid(x) * (1 - sigmoid(x))
  • 您执行self.layer1self.layer2的相同更新,这显然是错误的。花一些时间分析how exactly backpropagation作品。

神经网络与深度学习的第1章和第2章,作者Michael Nielsen,http://neuralnetworksanddeeplearning.com/chap1.html免费提供。本书对神经网络的工作原理给出了详尽的解释,包括执行后数学的细分。

来自开始的反向传播,由ErikHallström,由Maxim联系。 https://medium.com/@erikhallstrm/backpropagation-from-the-beginning-77356edf427d 。不像上面的指南那么彻底,但我同时保持开放,因为本指南更重要的是关于什么是重要的以及如何应用在尼尔森书中详细解释的数学公式。 / p>

如何用9行Python代码构建一个简单的神经网络 https://medium.com/technology-invention-and-more/how-to-build-a-simple-neural-network-in-9-lines-of-python-code-cc8f23647ca1 。对一些神经网络基础知识的有用和快速的介绍。


import random
import numpy as np
import scipy
import math
class Network(object):

    def __init__(self,inputLayerSize,hiddenLayerSize,outputLayerSize):

        #Layers represented both by their weights array and activation and inputsums vectors.
        self.layer1 = np.random.randn(hiddenLayerSize,inputLayerSize)
        self.layer2 = np.random.randn(outputLayerSize,hiddenLayerSize)

        self.layer1_activations = np.zeros((hiddenLayerSize, 1))
        self.layer2_activations = np.zeros((outputLayerSize, 1))

        self.layer1_inputsums = np.zeros((hiddenLayerSize, 1))
        self.layer2_inputsums = np.zeros((outputLayerSize, 1))

        self.layer1_errorsignals = np.zeros((hiddenLayerSize, 1))
        self.layer2_errorsignals = np.zeros((outputLayerSize, 1))

        self.layer1_deltaw = np.zeros((hiddenLayerSize, inputLayerSize))
        self.layer2_deltaw = np.zeros((outputLayerSize, hiddenLayerSize))

        self.outputLayerSize = outputLayerSize
        self.inputLayerSize = inputLayerSize
        self.hiddenLayerSize = hiddenLayerSize
        # self.weights = [np.random.randn(y,x)
        #                 for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, network_input):
        #Calculate inputsum and and activations for each neuron in the first layer
        for neuron in range(self.hiddenLayerSize):
            self.layer1_inputsums[neuron] = network_input * self.layer1[neuron]
            self.layer1_activations[neuron] = self.sigmoid(self.layer1_inputsums[neuron])

        # Calculate inputsum and and activations for each neuron in the second layer. Notice that each neuron in the second layer represented by
        # weights vector, consisting of all weights leading out of the kth neuron in (l-1) layer to the jth neuron in layer l.
        self.layer2_inputsums = np.zeros((self.outputLayerSize, 1))
        for neuron in range(self.outputLayerSize):
            for weight in range(self.hiddenLayerSize):
                self.layer2_inputsums[neuron] += self.layer1_activations[weight]*self.layer2[neuron][weight]
            self.layer2_activations[neuron] = self.sigmoid(self.layer2_inputsums[neuron])

        return self.layer2_activations

    def interpreted_output(self, network_input):
        #convert layer 2 activation numbers to a single output. The neuron (weight vector) with highest activation will be output.
        outputs = [x / 10 for x in range(-int((self.outputLayerSize/2)), int((self.outputLayerSize/2))+1, 1)] #range(-10, 11, 1)

    # def build_expected_output(self, training_data):
    #     #Views expected output number y for each x to generate an expected output vector from the network
    #     index=0
    #     for pair in training_data:
    #         expected_output_vector = np.zeros((self.outputLayerSize,1))
    #         x = training_data[0]
    #         y = training_data[1]
    #         for i in range(-int((self.outputLayerSize / 2)), int((self.outputLayerSize / 2)) + 1, 1):
    #             if y == i / 10:
    #                 expected_output_vector[i] = 1
    #                 #expect the target category to be a 1.
    #                 break
    #         training_data[index][1] = expected_output_vector
    #         index+=1
    #     return training_data

    def train(self, training_data, learn_rate):
        self.backpropagate(training_data, learn_rate)

    def backpropagate(self, train_data, learn_rate):
        #Perform for each x,y pair.
        for datapair in range(len(train_data)):
            x = train_data[datapair][0]
            y = train_data[datapair][1]
           # print("l2a " + str(self.layer2_activations))
           # print("l1a " + str(self.layer1_activations))
           # print("l2 " + str(self.layer2))
           # print("l1 " + str(self.layer1))
            for neuron in range(self.outputLayerSize):
                #Calculate first error equation for error signals of output layer neurons
                self.layer2_errorsignals[neuron] = (self.layer2_activations[neuron] - y[neuron]) * self.sigmoid_prime(self.layer2_inputsums[neuron])

            #Use recursive formula to calculate error signals of hidden layer neurons
            self.layer1_errorsignals = np.multiply(np.array(np.matrix(self.layer2.T) * np.matrix(self.layer2_errorsignals)) , self.sigmoid_prime(self.layer1_inputsums))
            # for neuron in range(self.hiddenLayerSize):
            #     #Use recursive formula to calculate error signals of hidden layer neurons
            #     self.layer1_errorsignals[neuron] = np.multiply(self.layer2[neuron].T,self.layer2_errorsignals[neuron]) * self.sigmoid_prime(self.layer1_inputsums[neuron])

            #Partial derivative of C with respect to weight for connection from kth neuron in (l-1)th layer to jth neuron in lth layer is
            #(jth error signal in lth layer) * (kth activation in (l-1)th layer.)
            #Update all weights for network at each iteration of a training pair.

            #Update weights in second layer
            for neuron in range(self.outputLayerSize):
                for weight in range(self.hiddenLayerSize):
                    self.layer2_deltaw[neuron][weight] = self.layer2_errorsignals[neuron]*self.layer1_activations[weight]*(-learn_rate)

            self.layer2 += self.layer2_deltaw

            #Update weights in first layer
            for neuron in range(self.hiddenLayerSize):
                self.layer1_deltaw[neuron] = self.layer1_errorsignals[neuron]*(x)*(-learn_rate)

            self.layer1 += self.layer1_deltaw
            #Comment/Uncomment to enable error evaluation.
            #print("Epoch {0}: Error: {1}".format(datapair, self.evaluate(test_data)))
            # print("l2a " + str(self.layer2_activations))
            # print("l1a " + str(self.layer1_activations))
            # print("l1 " + str(self.layer1))
            # print("l2 " + str(self.layer2))

    def evaluate(self, test_data):
        error = 0
        for x, y in test_data:
            #x is integer, y is single element np.array
            output = self.feedforward(x)
            error += y - output
        return error

#eval function for sin(x)
    # def evaluate(self, test_data):
    #     """
    #     Returns number of test inputs which network evaluates correctly.
    #     The ouput assumed to be neuron in output layer with highest activation
    #     :param test_data: test data set identical in form to train data set.
    #     :return: integer sum
    #     """
    #     correct = 0
    #     for x, y in test_data:
    #         outputs = [x / 10 for x in range(-int((self.outputLayerSize / 2)), int((self.outputLayerSize / 2)) + 1,
    #                                          1)]  # range(-10, 11, 1)
    #         newy = outputs[np.argmax(y)]
    #         output = self.interpreted_output(x)
    #         #print("output: " + str(output))
    #         if output == newy:
    #             correct+=1
    #     return(correct)

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoid_prime(self, z):
        return (1 - self.sigmoid(z)) * self.sigmoid(z)

def build_simple_data(data_points):
    x_vals = []
    y_vals = []
    for each in range(data_points):
        x = random.randint(-3,3)
        expected_output_vector = np.zeros((1, 1))
        if x > 0:
            expected_output_vector[[0]] = 1
            expected_output_vector[[0]] = 0

    return (list(zip(x_vals,y_vals)))

simpleNet = Network(1, 3, 1)
# print("Pretest")
# print(simpleNet.feedforward(-3))
# print(simpleNet.feedforward(10))
# init_weights_l1 = simpleNet.layer1
# init_weights_l2 = simpleNet.layer2
# simpleNet.train(build_simple_data(10000),.1)
# #sometimes Error converges to 0, sometimes error converges to 10.
# print("Initial Weights:")
# print(init_weights_l1)
# print(init_weights_l2)
# print("Final Weights")
# print(simpleNet.layer1)
# print(simpleNet.layer2)
# print("Post-test")
# print(simpleNet.feedforward(-3))
# print(simpleNet.feedforward(10))

def test_network(iterations,net,training_points):
    Casually evaluates pre and post test
    :param iterations: number of trials to be run
    :param net: name of network to evaluate.
    ;param training_points: size of training data to be used
    :return: four 1x1 arrays.
    pretest_negative = 0
    pretest_positive = 0
    posttest_negative = 0
    posttest_positive = 0
    for each in range(iterations):
        pretest_negative += net.feedforward(-10)
        pretest_positive += net.feedforward(10)
    for each in range(iterations):
        posttest_negative += net.feedforward(-10)
        posttest_positive += net.feedforward(10)
    return(pretest_negative/iterations, pretest_positive/iterations, posttest_negative/iterations, posttest_positive/iterations)

print(test_network(10000, simpleNet, 10000))


 #second layer's output activations use layer1's activations as input:
    for neuron in range(self.outputLayerSize):
        for weight in range(self.hiddenLayerSize):
            self.layer2_activations[neuron] += self.layer1_activations[weight]*self.layer2[neuron][weight]
        self.layer2_activations[neuron] = 1/(1+np.exp(self.layer2_activations[neuron]))


self.layer2_activations[neuron] += self.layer1_activations[weight]*self.layer2[neuron][weight]


self.layer2_inputsums[neuron] += self.layer1_activations[weight]*self.layer2[neuron][weight]

在更新的代码中。该线执行每个权重向量和每个输入向量之间的点积(来自层1的激活)以到达神经元的input_sum,通常称为z(思考sigmoid(z))。在我的网络中,sigmoid函数的导数sigmoid_prime用于计算成本函数相对于所有权重的梯度。通过在实际输出和预期输出之间乘以sigmoid_prime(z)*网络误差。如果z非常大(且为正),则神经元的激活值将非常接近1.这意味着网络确信该神经元应该激活。如果z非常负,则同样如此。因此,网络不希望从根本上调整它满意的权重,因此神经元的每个权重的变化规模由sigmoid(z),sigmoid_prime(z)的梯度给出。非常大的z意味着非常小的梯度和非常小的变化应用于权重(当z = 0时,sigmoid的梯度最大化,当网络不知道应该如何分类神经元以及当神经元的激活为0.5时)。 / p>


self.layer2_inputsums = np.zeros((self.outputLayerSize, 1))
