我正在建立一个神经网络来识别手写符号。在训练网络之前,我从来没有建过一个。所有预测都收敛到50%。
当我运行代码时,错误率会降低,直到开始收敛到0.5左右为止,然后对所有21个符号的预测都等于0.5。
我的神经网络非常简单:
import numpy as np
import glob
import csv
np.random.seed(0)
# Pixel values for 'C', represents 20x20 image of a 'C'
input1 = np.array([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,00,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,00,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
# 21 possible outputs, a 1 in labels[9] represents a 'C'
labels = np.array([[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0]])
# 400 inputs, 1 for each pixel (20x20 images)
w1 = 2*np.random.random((400,21)) - 1
# 21 nodes in hidden layer
w2 = 2*np.random.random((21,21)) - 1
# 21 output nodes (I have to differentiate between 21 symbols)
w3 = 2*np.random.random((21,21)) - 1
learning_rate = 0.01
l1_bias = np.random.rand(1)
l2_bias = np.random.rand(1)
def sigmoid(x):
x = np.clip( x, -500, 500 )
return 1/(1+np.exp(-x))
def sigmoid_der(x):
return sigmoid(x)*(1-sigmoid(x))
# Learning part
for x in range(1000):
# Feed-forward part
input = input1
# Multiply input by weights
l1 = np.dot(input, w1)
# Add bias
l1 += l1_bias
# Squish with sigmoid
l1 = sigmoid(l1)
# Repeat for other layers
l2 = np.dot(l1, w2)
l2 += l2_bias
l2 = sigmoid(l2)
l3 = np.dot(l2, w3)
l3 = sigmoid(l3)
# l3 is output layer
# Backward propagation
# Calculate error
error = 1 / 2 * ((labels - l3.T) ** 2)
# Calculate adjustment for weights for each layer using error and sigmoid_der
w3_adjustment = np.dot(l2.T, (error * sigmoid_der(l3.T)).T)
w2_adjustment = np.dot(l1.T, (error * sigmoid_der(l2.T)).T)
w1_adjustment = np.dot(input.T, (error * sigmoid_der(l1.T)).T)
# Adjust weights
w3 = w3 - w3_adjustment * learning_rate
w2 = w2 - w2_adjustment * learning_rate
w1 = w1 - w1_adjustment * learning_rate
有人知道我是否犯了一些菜鸟错误,这可能导致我的输出收敛?谢谢!