通读Iamtrask's guide on programming a simple NN in python之后,我尝试将其重写为一个简单的类,以便可以选择层数和大小并应用它来更轻松地解决其他问题。
经过一番摸索之后,它在该教程中的示例以及其他简单的事情(例如二进制数<< >>格雷码转换)上的出色表现达到了一个很好的地步,所以我想我会少做些MNIST手写数字数据集非常简单。
不幸的是,这就是我被困住的地方。在前几代之后,输出层总是接近
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
thaaat。它们永远不会全部达到为零,但结果是无论输入如何,最终都只能猜测相同的数字,因为输出节点之一是 just < / em>从零开始比其他位置略远。我尝试在两个隐藏层中添加更多的节点,直到Python告诉我足够了,然后尝试仅在一个隐藏层中进行操作,结果结果再好不过了。
起初,我认为我必须误解了有关反向传播的一些基本知识,但是为什么我的NN在较简单的问题上可以很好地进行调整?我在这里缺少什么,如何解决它以达到有用的结果?
这是我的神经网络类代码(72行):
import numpy as np
class neuralNetwork():
def __init__(self, layer_node_counts):
self.synapses = self.init_synapses(layer_node_counts)
def init_synapses(self, layer_node_counts):
last_layer_node_count = layer_node_counts[0]
synapses = []
for current_layer_node_count in layer_node_counts[1:]:
synapses.append(2* np.random.random((last_layer_node_count, current_layer_node_count)) - 1)
last_layer_node_count = current_layer_node_count
return synapses
def sigmoid(self, x):
return 1/(1 + np.exp(-x))
def sigmoid_output_to_derivative(self, x):
# kind of a bell curve!
return x*(1-x)
def feed_forward(self, input_):
# forward propagation through all our synapses and layers, starting with the input array:
layers = [np.array(input_)]
for key, synapse in enumerate(self.synapses):
newLayer = self.sigmoid(layers[key] @ synapse)
layers.append(newLayer)
return layers
def classify(self, input_):
resulting_layers = self.feed_forward(input_)
# return output layer(s)
return resulting_layers[-1]
def train(self, input_, target_output):
input_ = np.atleast_2d(input_)
target_output = np.atleast_2d(target_output)
layer_result_matrices = self.feed_forward(input_)
synapse_adjustments_total = [0] * len(self.synapses)
# how much this layer was off the mark
output_error = target_output - layer_result_matrices[-1]
# how much we're letting it matter (bell curve height - depends on "confidence" of the synapse connection)
output_delta = output_error * self.sigmoid_derivative(layer_result_matrices[-1])
layer_deltas = [output_delta]
for index in reversed(range(1, len(self.synapses))):
layer_error = layer_deltas[0] @ self.synapses[index].T
layer_delta = layer_error * self.sigmoid_derivative(layer_result_matrices[index])
layer_deltas.insert(0, layer_delta)
for index in range(len(self.synapses)):
synapse_adjustments_total[index] += layer_result_matrices[index].T @ layer_deltas[index]
for index, adjustment in enumerate(synapse_adjustments_total):
self.synapses[index] += adjustment
return self.synapses
def calculate_mean_error(self, input_, target_output):
current_output = self.classify(input_)
error_matrix = np.abs(target_output - current_output) / len(target_output)
mean_error = np.mean(np.abs(error_matrix))
return mean_error
...以及我的培训代码(64行):
# -*- coding: utf-8 -*-
import numpy as np
import nekkowe_neural_network as nnn
from mnist import MNIST
def normalize_input(images):
return np.array(images) / (255 * 0.99 + 0.01)
def get_one_hot_by_label(label):
return [0.99 if i == label else 0.01 for i in range(10)]
def get_label_by_one_hot(layer):
return np.argmax(layer)
def test_accuracy(neural_network, test_images, target_labels):
guesses = 0
correct_guesses = 0
normalized_input = normalize_input(test_images)
output_layers = neural_network.classify(normalized_input)
for i, output_layer in enumerate(output_layers):
predicted_label = get_label_by_one_hot(output_layer)
target_label = target_labels[i]
guesses += 1
correct_guesses += 1 if predicted_label == target_label else 0
print(str(correct_guesses) + "/" + str(guesses) + " correct")
BATCH_SIZE = 64
MAX_ITERATIONS = 1000
np.random.seed(1)
neural_network = nnn.neuralNetwork([28**2, 28**2, 28**2, 10])
mndata = MNIST('MNIST')
#training_data_images, training_data_labels = mndata.load_training()
#training_data_one_hot = [get_one_hot_by_label(label) for label in training_data_labels]
testing_data_images, testing_data_labels = mndata.load_testing()
training_data = mndata.load_training_in_batches(BATCH_SIZE)
for i, batch in enumerate(training_data):
training_data_images = np.array(batch[0])
training_data_labels = np.array(batch[1])
training_data_one_hot = np.array([get_one_hot_by_label(label) for label in training_data_labels])
if i > 0:
neural_network.train(training_data_images, training_data_one_hot)
# Report progress at 0, 1, 10, 100, 200, 300, 400 etc. as well as the final one:
if i % 100 == 0 or np.log10(i) % 1 == 0 or i == MAX_ITERATIONS:
print("Batch " + str(i) + ":")
test_accuracy(neural_network, testing_data_images, testing_data_labels)
if i == MAX_ITERATIONS:
print("Reached iteration limit!")
break
print("All done!")