Question

我的问题仅涉及此代码的一小部分，但即时发布它有助于它。它来自http://neuralnetworksanddeeplearning.com/chap1.html#implementing_our_network_to_classify_digits（从这里向下滚动以查看代码说明）

import numpy as np

    class Network(object):

    def __init__(self, sizes):
        """The list ``sizes`` contains the number of neurons in the
        respective layers of the network.  For example, if the list
        was [2, 3, 1] then it would be a three-layer network, with the
        first layer containing 2 neurons, the second layer 3 neurons,
        and the third layer 1 neuron.  The biases and weights for the
        network are initialized randomly, using a Gaussian
        distribution with mean 0, and variance 1.  Note that the first
        layer is assumed to be an input layer, and by convention we
        won't set any biases for those neurons, since biases are only
        ever used in computing the outputs from later layers."""
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        """Return the output of the network if ``a`` is input."""
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta,
            test_data=None):
        """Train the neural network using mini-batch stochastic
        gradient descent.  The ``training_data`` is a list of tuples
        ``(x, y)`` representing the training inputs and the desired
        outputs.  The other non-optional parameters are
        self-explanatory.  If ``test_data`` is provided then the
        network will be evaluated against the test data after each
        epoch, and partial progress printed out.  This is useful for
        tracking progress, but slows things down substantially."""
        if test_data: n_test = len(test_data)
        n = len(training_data)
        for j in xrange(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in xrange(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data:
                print "Epoch {0}: {1} / {2}".format(
                    j, self.evaluate(test_data), n_test)
            else:
                print "Epoch {0} complete".format(j)

    def update_mini_batch(self, mini_batch, eta):
        """Update the network's weights and biases by applying
        gradient descent using backpropagation to a single mini batch.
        The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
        is the learning rate."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb
                       for b, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        """Return a tuple ``(nabla_b, nabla_w)`` representing the
        gradient for the cost function C_x.  ``nabla_b`` and
        ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
        to ``self.biases`` and ``self.weights``."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # feedforward
        activation = x
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation)+b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        # backward pass
        delta = self.cost_derivative(activations[-1], y) * \
            sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # Note that the variable l in the loop below is used a little
        # differently to the notation in Chapter 2 of the book.  Here,
        # l = 1 means the last layer of neurons, l = 2 is the
        # second-last layer, and so on.  It's a renumbering of the
        # scheme in the book, used here to take advantage of the fact
        # that Python can use negative indices in lists.
        for l in xrange(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        return (nabla_b, nabla_w)

    def evaluate(self, test_data):
        """Return the number of test inputs for which the neural
        network outputs the correct result. Note that the neural
        network's output is assumed to be the index of whichever
        neuron in the final layer has the highest activation."""
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        """Return the vector of partial derivatives \partial C_x /
        \partial a for the output activations."""
        return (output_activations-y)

#### Miscellaneous functions
def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

忽略大部分代码，除非您需要退一步了解数据结构。首先，在self.cost_derivative(activations[-1], y)方法的中间行backprop中，我们可以看到两个值被传递 - 从我所知道的，两个值都是数组（我可以在输出时看到这一点），也是作者解释的）。在cost_derivative方法中，它所做的就是减去这两个值 - 但它们是数组，那么它是如何工作的呢？

当我在python中这样做时，我可以理解得到一个错误

a = [1,2,4]
b = [5,6,7]
print(a-b)

我相信这可能是因为它们是numpy数组？

此外，sigmoid和sigmoid_prime函数也发生了类似的事情，其中z是一个数组（请参阅将参数作为数组调用这些函数的位置）。。即使该功能将其视为单个值...这是如何工作的？我假设它只对数组中的每个值做了吗？

基本上，我一直在看到我希望只能使用单个值来处理数组的功能。

为任何解释干杯，我发布的链接有更多解释。

Answer 1

当你减去两个列表时：

a = [1,2,4]
b = [5,6,7]
print(a-b)

python调用函数__sub__，试图减去它们。 Vanilla python的__sub__无法减去列表，列表对象也没有__sub__个函数，因此会抛出错误。

从numpy数组中减去列表时：

a = [1,2,4]
b = numpy.array([5,6,7])
print(a-b)

Vanilla __sub__仍然失败，但是python会查找任何特定于对象的__sub__函数，并找到numpy＆＃39; s。 Numpy包装np.asarray()中的所有其他对象，并试图像numpy数组一样减去它们。由于列表被映射到一维数组，并且大小相同，因此减法有效，最终得到一个数组作为输出。

Answer 2

你是正确的，减法是有效的，因为output_activations和y是numpy数组。 Numpy是用于快速矩阵运算的python库，而numpy数组重载-以执行矩阵减法。

但是，在您的示例中，[1,2,4]只是一个简单的python列表，而不是一个numpy数组，并且-运算符不是为列表定义的。如果用numpy数组替换列表，输出更有意义：

import numpy as np

a = np.array([1,2,4])
b = np.array([5,6,7])
print(a-b)
# [-4 -4 -3]

Answer 3

是的，你得到错误，因为那些是numpy数组而不是标量。请改用np.subtract（a，b）。你需要记住你试图减去向量。

这个减法如何在python中工作？

3 个答案: