你好我把我的代码用于检查,因为我已经在python中使用我的神经网络实现了几个星期,我似乎无法将错误分类错误降低到17%以下16%。我一直在尝试不同的学习率值,不同的隐藏神经元数量,仍然没有很多的改进。我很清楚我的实现是基本的传统神经网络,但是根据我在互联网上看到的其他实现,我期待更好的结果。我希望这对你们感兴趣,如果你能指出我的代码中可能出现问题的新想法,或者你认为这是我能用传统实现做的最好的话,那真的很酷我应该添加新的东西,这也很酷。
无论如何,这是我的代码,我希望它足够可读,我尽可能简单地尝试,因为它是我理解神经网络如何工作的方式。
编辑:也许我的问题不是那么清楚,基本上我想,如果它对你们感兴趣,就是帮助我找到我当前实施中的细节,这可以将我的错误分类错误提高到17%以下,因为它显然是最好的我的实施可以做到。我非常感谢任何建议或想法,我对这个主题非常感兴趣,但我是一个初学者,有一些聪明的想法可以帮助我改进我的实施,这将是很棒的。文件:mnist_dataset.py - 提取mnist数据
import numpy as np
from struct import unpack
train_input_file = open("dataset/train-images-idx3-ubyte", "rb")
train_output_file = open("/dataset/train-labels-idx1-ubyte", "rb")
test_input_file = open("dataset/t10k-images-idx3-ubyte", "rb")
test_output_file = open("dataset/t10k-labels-idx1-ubyte", "rb")
def readData(f,labels = False,scale = 1):
header = hex(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
num = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
col = 1
row = 1
if labels == False:
row = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
col = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
data = np.zeros((int(num/scale),col*row))
for i in range(0,int(num/scale),1):
data[i] = np.fromfile(f,dtype=np.ubyte,count=col*row)
return data
def getMNISTData():
def norm(v):
return v/255
train_input = readData(train_input_file, scale=1)/255.0
train_out = readData(train_output_file, True,scale=1)
test_input = readData(test_input_file)/255.0
test_out = readData(test_output_file, True)
print "Train input: " + str(train_input.shape)
print "Train output: " + str(train_out.shape)
print "Test input: " + str(test_input.shape)
print "Test output: " + str(test_out.shape)
train_input_file.close()
train_output_file.close()
test_input_file.close()
test_output_file.close()
return (train_input,train_out,test_input,test_out)
文件:NN.py - 神经网络实现
import mnist_dataset
import numpy as np
import random
import matplotlib.pyplot as plt
def encode_data_10(v):
e = (0.0) * np.ones((1, 10), dtype=float)
e[:, int(v)] = 1.0
return e.tolist()
def encode_data_1(v):
n = -1.0 + ((0.2)*v)
return n
x_train, y_train, x_test, y_test = mnist_dataset.getMNISTData()
learning_rate = 1.0
iter = 3000
sample_size = 30
num_hidden_neurons = 500
num_output_neurons = 10
if num_output_neurons > 1:
y_train = np.matrix(np.array(map(encode_data_10,y_train)))
y_test = np.matrix(np.array(map(encode_data_10,y_test)))
else:
y_train = np.matrix(map(encode_data_1,y_train))
y_test = np.matrix(map(encode_data_1,y_test))
def getSample(sample_size,x,y):
r = random.sample(xrange(1, len(y), 1), sample_size)
x_r = np.zeros((sample_size,x.shape[1]))
y_r = np.zeros((sample_size,y.shape[1]))
for i,n in enumerate(r):
x_r[i] = x[n]
y_r[i] = y[n]
return (x_r,y_r)
inputVector, targetVector = getSample(sample_size, x_train, y_train)
hiddenWeights = np.mat(np.random.random((num_hidden_neurons, x_train.shape[1])))
print "W0 shape: " + str(hiddenWeights.shape)
outputWeights = np.mat(np.random.random((num_output_neurons,num_hidden_neurons)))
print "W1 shape: " + str(outputWeights.shape)
def act_func_l1(a):
return (1.0/(1 + np.exp(-a)))
def der_act_func_l1(a):
return act_func_l1(a)*(1.0 - act_func_l1(a))
def feedforward(l0):
global hiddenWeights
global outputWeights
Z1 = l0 * hiddenWeights.T
layer1 = np.matrix(act_func_l1(np.asarray(Z1)))
Z2 = layer1 * outputWeights.T
layer2 = act_func_l1(np.asarray(Z2))
return (layer1,layer2)
def miss(x,y):
layer1, layer2 = feedforward(x)
def c(n):
if n > 0.5:
return 1.0
else:
return 0.0
layer2 = map(lambda v: map(c, v), layer2)
def cc(t):
return np.abs(cmp(np.array(y[t[0]]).tolist()[0], np.array(t[1]).tolist()))
return (np.sum(map(cc, enumerate(layer2))))
miss_x = np.zeros((iter, 1))
for j in xrange(iter):
hiddenActualInput = inputVector * hiddenWeights.T
hiddenOutputVector = np.matrix(act_func_l1(np.asarray(hiddenActualInput)))
outputActualInput = hiddenOutputVector * outputWeights.T
outputVector = act_func_l1(np.asarray(outputActualInput))
layer2_error2 = np.square(outputVector - targetVector)
print "Error: " + str(np.mean(np.abs(layer2_error2)))
m = miss(x_test,y_test)
miss_x[j] = m
print str(j) + " - Misses (%): " + str(m)
if m <= 2000:
learning_rate = 0.05
outputDelta = np.mat(der_act_func_l1(np.asarray(outputVector))*np.asarray(outputVector - targetVector))
hiddenDelta = np.mat(der_act_func_l1(np.asarray(hiddenOutputVector)) * np.asarray((outputDelta*outputWeights)))
hiddenWeights = np.mat(hiddenWeights.T - (learning_rate*np.asarray(inputVector.T*hiddenDelta))).T
outputWeights = np.mat(outputWeights.T - (learning_rate*np.asarray(hiddenOutputVector.T*outputDelta))).T
inputVector, targetVector = getSample(sample_size, x_train, y_train)
plt.plot(xrange(iter), miss_x, label = 'Miss rate(%)')
plt.legend(loc='upper right')
plt.show()