我编写了一个python程序,其中我试图实现一个3层神经网络来处理MNIST图形识别问题,但是我从每个时代得到的错误并没有下降。有人可以帮我弄清楚这是我的代码问题还是我的模型问题? -------更新--- 我打印了输出f3的数组,所有值在一个循环后减少。我真的很困惑。
这是代码
import struct
import numpy as np
import os
import matplotlib.pyplot as plt
np.random.seed(0)
## compute sigmoid nonlinearity
def sigmoid(x):
output = 1/(1+np.exp(-x))
return output
# convert output of sigmoid function to its derivative
def sigm_deri(output):
return output*(1-output)
def softmax(x):
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
#exp_scores=np.exp(x)
#probs=exp_scores/np.sum(exp_scores, axis=1,keepdims=True)
#return probs
def softmax_deri(signal):
J = - signal[..., None] * signal[:, None, :] # off-diagonal Jacobian
iy, ix = np.diag_indices_from(J[0])
J[:, iy, ix] = signal * (1. - signal) # diagonal
return J.sum(axis=1) # sum across-rows for each sample
def relu(x):
return np.maximum(x, 0)
def relu_deri(output):
return 1.*(output>0)
alpha = 0.03
lamda = 0.1#alpha*alpha
input_dim = 28*28
hidden_dim1 = 500
hidden_dim2 = 100
output_dim = 10
# initialize neural network weights
synapse_0 = (2*np.random.random((input_dim,hidden_dim1))-1)
synapse_1 = (2*np.random.random((hidden_dim1,hidden_dim2))-1)
synapse_2 = (2*np.random.random((hidden_dim2,output_dim)) -1)
bias_0=np.zeros((1, hidden_dim1))
bias_1=np.zeros((1, hidden_dim2))
bias_2=np.zeros((1, output_dim))
imagef = open('/home/rdeng/code/mine/nn/data/train-images-idx3-ubyte', 'rb')
labelf = open('/home/rdeng/code/mine/nn/data/train-labels-idx1-ubyte', 'rb')
magic, imgNum=struct.unpack(">II", imagef.read(8))
imgRow, imgCol =struct.unpack(">II", imagef.read(8))
print magic, imgNum, imgRow, imgCol
lblMagic, lblNum=struct.unpack(">II", labelf.read(8))
print lblMagic, lblNum
overallError = 0
loop=100
errordot=np.zeros((loop,1))
X=np.fromfile(imagef, np.uint8, imgRow*imgCol)
y=np.fromfile(labelf, np.uint8, 1)
for j in range(loop):
#read a 28x28 image and a byte label
#X=np.fromfile(imagef, np.uint8, imgRow*imgCol)
#y=np.fromfile(labelf, np.uint8, 1)
X=X.reshape(1, imgRow*imgCol)
#Forward propagation
z1=np.dot(X, synapse_0)
f1=sigmoid(z1+bias_0)
z2=np.dot(f1, synapse_1)
f2=sigmoid(z2+bias_1)
z3=np.dot(f2, synapse_2)
f3=sigmoid(z3+bias_2)
sumf3=np.sum(np.abs(f3))
f4=softmax(f3)
pred=np.argmax(f4)
yy=np.zeros((1, output_dim))
yy[0, y]=sumf3
#backward propagation
#ignore softmax layer
error=pow(f3-yy,2)/2
errordot[j]=np.sum(error)
if j%(loop/10)==0:
print 'f3=',f3
print 'pred=',pred,'y=',y,'error=',error
gprime=sigm_deri(f3)
delta3=error#*gprime
#print 'delta3=',delta3.shape,'synapse_2.T=',synapse_2.T.shape
delta2=np.dot(delta3, synapse_2.T)*sigm_deri(f2)
#print 'delta2=',delta2.shape,'synapse_1.T=',synapse_1.T.shape
delta1=np.dot(delta2, synapse_1.T)*sigm_deri(f1)
d2=np.dot(delta3.T, f2)
dbias_2=delta3
d1=np.dot(delta2.T, f1)
dbias_1=delta2
d0=np.dot(delta1.T, X)
dbias_0=delta1
synapse_0-=alpha*(d0.T+lamda*synapse_0)/hidden_dim1
synapse_1-=alpha*(d1.T+lamda*synapse_1)/hidden_dim2
synapse_2-=alpha*(d2.T+lamda*synapse_2)/output_dim
bias_0-=alpha*dbias_0/hidden_dim1
bias_1-=alpha*dbias_1/hidden_dim2
bias_2-=alpha*dbias_2/output_dim
print 'overallerror=',overallError
plt.plot(range(loop), errordot, "o")
plt.show()
我更新了代码以循环训练一个图像,但错误似乎不正确,因为它正在增加。
此外,f3
的每个数据都越来越小,但正确的数据也会越来越小,导致错误发生变化。