我在这里关注了一篇文章:TowardsDataScience。
我写了关于网络的数学方程式,一切都说得通。
但是,编写代码后,结果却很奇怪,就像它预测总是相同的类...
我花了很多时间,做了很多事情,但是我仍然不明白自己做错了什么。
代码如下:
# coding: utf-8
from mnist import MNIST
import numpy as np
import math
import os
import pdb
DATASETS_PREFIX = '../Datasets/MNIST'
mndata = MNIST(DATASETS_PREFIX)
TRAINING_IMAGES, TRAINING_LABELS = mndata.load_training()
TESTING_IMAGES , TESTING_LABELS = mndata.load_testing()
### UTILS
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def d_sigmoid(x):
return x.T * (1 - x)
#return np.dot(x.T, 1.0 - x)
def softmax(x):
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
def d_softmax(x):
#This function has not yet been tested.
return x.T * (1 - x)
def tanh(x):
return np.tanh(x)
def d_tanh(x):
return 1 - x.T * x
def normalize(image):
return image / (255.0 * 0.99 + 0.01)
### !UTILS
class NeuralNetwork(object):
"""
This is a 3-layer neural network (1 hidden layer).
@_input : input layer
@_weights1: weights between input layer and hidden layer (matrix shape (input.shape[1], 4))
@_weights2: weights between hidden layer and output layer (matrix shape (4, 1))
@_y : output
@_output : computed output
@_alpha : learning rate
"""
def __init__(self, xshape, yshape):
self._neurones_nb = 20
self._input = None
self._weights1 = np.random.randn(xshape, self._neurones_nb)
self._weights2 = np.random.randn(self._neurones_nb, yshape)
self._y = np.mat(np.zeros(yshape))
self._output = np.mat(np.zeros(yshape))
self._alpha1 = 0.1
self._alpha2 = 0.1
self._function = sigmoid
self._derivative = d_sigmoid
self._epoch = 1
def Train(self, xs, ys):
for j in range(self._epoch):
for i in range(len(xs)):
self._input = normalize(np.mat(xs[i]))
self._y[0, ys[i]] = 1
self.feedforward()
self.backpropagation()
self._y[0, ys[i]] = 0
def Predict(self, image):
self._input = normalize(image)
out = self.feedforward()
return out
def feedforward(self):
self._layer1 = self._function(np.dot(self._input, self._weights1))
self._output = self._function(np.dot(self._layer1, self._weights2))
return self._output
def backpropagation(self):
d_weights2 = np.dot(
self._layer1.T,
2 * (self._y - self._output) * self._derivative(self._output)
)
d_weights1 = np.dot(
self._input.T,
np.dot(
2 * (self._y - self._output) * self._derivative(self._output),
self._weights2.T
) * self._derivative(self._layer1)
)
self._weights1 += self._alpha1 * d_weights1
self._weights2 += self._alpha2 * d_weights2
if __name__ == '__main__':
neural_network = NeuralNetwork(len(TRAINING_IMAGES[0]), 10)
print('* training neural network')
neural_network.Train(TRAINING_IMAGES, TRAINING_LABELS)
print('* testing neural network')
count = 0
for i in range(len(TESTING_IMAGES)):
image = np.mat(TESTING_IMAGES[i])
expected = TESTING_LABELS[i]
prediction = neural_network.Predict(image)
if i % 100 == 0: print(expected, prediction)
#print(f'* results: {count} / {len(TESTING_IMAGES)}')
非常感谢您的帮助。
朱利安
答案 0 :(得分:0)
好吧,我看不到实施中的任何错误,因此考虑到您的网络,可以通过做两件事来改善这一点:
一个时代是不够的。喜欢不是全部!您需要多次传递数据(一次最少要传递10次,平均可能需要大约100个纪元,而这个数字最多可以达到5000个)
您的网络是浅层网络,例如真的很简单。要检测困难的事物(例如图像),可以实施CNN(卷积神经网络),或者首先尝试加深网络并使之复杂化
=>尝试添加层(3、4、5等),然后根据输入大小将神经元添加到每个层(50、60,..)。您仍然可以增加800、900或更多。