MNIST的简单神经网络陷入17%的错误分类错误

时间:2016-07-21 16:31:42

标签: python machine-learning neural-network backpropagation mnist

你好我把我的代码用于检查,因为我已经在python中使用我的神经网络实现了几个星期,我似乎无法将错误分类错误降低到17%以下16%。我一直在尝试不同的学习率值,不同的隐藏神经元数量,仍然没有很多的改进。我很清楚我的实现是基本的传统神经网络,但是根据我在互联网上看到的其他实现,我期待更好的结果。我希望这对你们感兴趣,如果你能指出我的代码中可能出现问题的新想法,或者你认为这是我能用传统实现做的最好的话,那真的很酷我应该添加新的东西,这也很酷。

无论如何,这是我的代码,我希望它足够可读,我尽可能简单地尝试,因为它是我理解神经网络如何工作的方式。

编辑:也许我的问题不是那么清楚,基本上我想,如果它对你们感兴趣,就是帮助我找到我当前实施中的细节,这可以将我的错误分类错误提高到17%以下,因为它显然是最好的我的实施可以做到。我非常感谢任何建议或想法,我对这个主题非常感兴趣,但我是一个初学者,有一些聪明的想法可以帮助我改进我的实施,这将是很棒的。

文件:mnist_dataset.py - 提取mnist数据

import numpy as np
from struct import unpack

train_input_file = open("dataset/train-images-idx3-ubyte", "rb")
train_output_file = open("/dataset/train-labels-idx1-ubyte", "rb")
test_input_file = open("dataset/t10k-images-idx3-ubyte", "rb")
test_output_file = open("dataset/t10k-labels-idx1-ubyte", "rb")


def readData(f,labels = False,scale = 1):
    header = hex(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
num = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
col = 1
row = 1

if labels == False:
    row = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])
    col = int(unpack('>L',np.fromfile(f,dtype=np.int32,count=1)[0])[0])

    data = np.zeros((int(num/scale),col*row))
    for i in range(0,int(num/scale),1):
    data[i] = np.fromfile(f,dtype=np.ubyte,count=col*row)
return data


def getMNISTData():
    def norm(v):
        return v/255

train_input = readData(train_input_file, scale=1)/255.0
train_out = readData(train_output_file, True,scale=1)
test_input = readData(test_input_file)/255.0
test_out = readData(test_output_file, True)

print "Train input: " + str(train_input.shape)
print "Train output: " + str(train_out.shape)
print "Test input: " + str(test_input.shape)
print "Test output: " + str(test_out.shape)

train_input_file.close()
train_output_file.close()
test_input_file.close()
test_output_file.close()
return (train_input,train_out,test_input,test_out)

文件:NN.py - 神经网络实现

import mnist_dataset
import numpy as np
import random
import matplotlib.pyplot as plt


def encode_data_10(v):
    e = (0.0) * np.ones((1, 10), dtype=float)
    e[:, int(v)] = 1.0
    return e.tolist()

def encode_data_1(v):
    n = -1.0 + ((0.2)*v)
    return n

x_train, y_train, x_test, y_test =  mnist_dataset.getMNISTData()

learning_rate = 1.0
iter = 3000
sample_size = 30
num_hidden_neurons = 500
num_output_neurons = 10

if num_output_neurons > 1:
    y_train = np.matrix(np.array(map(encode_data_10,y_train)))
    y_test = np.matrix(np.array(map(encode_data_10,y_test)))
else:
    y_train = np.matrix(map(encode_data_1,y_train))
    y_test = np.matrix(map(encode_data_1,y_test))


def getSample(sample_size,x,y):
    r = random.sample(xrange(1, len(y), 1), sample_size)
    x_r = np.zeros((sample_size,x.shape[1]))
    y_r = np.zeros((sample_size,y.shape[1]))
    for i,n in enumerate(r):
        x_r[i] = x[n]
        y_r[i] = y[n]
    return (x_r,y_r)

inputVector, targetVector = getSample(sample_size, x_train, y_train)




hiddenWeights = np.mat(np.random.random((num_hidden_neurons, x_train.shape[1])))
print "W0 shape: " + str(hiddenWeights.shape)
outputWeights = np.mat(np.random.random((num_output_neurons,num_hidden_neurons)))
print "W1 shape: " + str(outputWeights.shape)


def act_func_l1(a):
    return (1.0/(1 + np.exp(-a)))

def der_act_func_l1(a):
    return act_func_l1(a)*(1.0 - act_func_l1(a))


def feedforward(l0):
    global hiddenWeights
    global outputWeights

    Z1 = l0 * hiddenWeights.T

    layer1 = np.matrix(act_func_l1(np.asarray(Z1)))

    Z2 = layer1 * outputWeights.T

    layer2 = act_func_l1(np.asarray(Z2))

    return (layer1,layer2)

def miss(x,y):
    layer1, layer2 = feedforward(x)

    def c(n):
        if n > 0.5:
            return 1.0
        else:
            return 0.0

    layer2 = map(lambda v: map(c, v), layer2)

    def cc(t):
        return np.abs(cmp(np.array(y[t[0]]).tolist()[0], np.array(t[1]).tolist()))
    return (np.sum(map(cc, enumerate(layer2))))

miss_x = np.zeros((iter, 1))
for j in xrange(iter):

    hiddenActualInput = inputVector * hiddenWeights.T

    hiddenOutputVector = np.matrix(act_func_l1(np.asarray(hiddenActualInput)))

    outputActualInput = hiddenOutputVector * outputWeights.T

    outputVector = act_func_l1(np.asarray(outputActualInput))


    layer2_error2 = np.square(outputVector - targetVector)
    print "Error: " + str(np.mean(np.abs(layer2_error2)))


    m = miss(x_test,y_test)
    miss_x[j] = m
    print str(j) + " - Misses (%): " + str(m)
    if m <= 2000:
        learning_rate = 0.05


    outputDelta = np.mat(der_act_func_l1(np.asarray(outputVector))*np.asarray(outputVector - targetVector))
    hiddenDelta =  np.mat(der_act_func_l1(np.asarray(hiddenOutputVector)) * np.asarray((outputDelta*outputWeights)))

    hiddenWeights = np.mat(hiddenWeights.T - (learning_rate*np.asarray(inputVector.T*hiddenDelta))).T
    outputWeights = np.mat(outputWeights.T - (learning_rate*np.asarray(hiddenOutputVector.T*outputDelta))).T

    inputVector, targetVector = getSample(sample_size, x_train, y_train)

plt.plot(xrange(iter), miss_x, label = 'Miss rate(%)')
plt.legend(loc='upper right')
plt.show()

0 个答案:

没有答案