手写神经网络权重不变

时间:2018-08-16 00:06:23

标签: python matrix neural-network artificial-intelligence mnist

from struct import unpack
import gzip
import numpy
from numpy import *
import matplotlib.pyplot as plt

learningRate = 0.1

def get_labeled_data(imagefile, labelfile):
    """Read input-vector (image) and target class (label, 0-9) and return
       it as list of tuples.
    """
    # Open the images with gzip in read binary mode
    images = gzip.open(imagefile, 'rb')
    labels = gzip.open(labelfile, 'rb')

    # Read the binary data

    # We have to get big endian unsigned int. So we need '>I'

    # Get metadata for images
    images.read(4)  # skip the magic_number
    number_of_images = images.read(4)
    number_of_images = unpack('>I', number_of_images)[0]
    rows = images.read(4)
    rows = unpack('>I', rows)[0]
    cols = images.read(4)
    cols = unpack('>I', cols)[0]

    # Get metadata for labels
    labels.read(4)  # skip the magic_number
    N = labels.read(4)
    N = unpack('>I', N)[0]

    if number_of_images != N:
        raise Exception('number of labels did not match the number of images')

    # Get the data
    x = zeros((N, rows, cols), dtype="float32")  # Initialize numpy array
    y = zeros((N, 1), dtype="uint8")  # Initialize numpy array
    for i in range(N):
        if i % 1000 == 0:
            print("i: %i" % i)
        for row in range(rows):
            for col in range(cols):
                tmp_pixel = images.read(1)  # Just a single byte
                tmp_pixel = unpack('>B', tmp_pixel)[0]
                x[i][row][col] = tmp_pixel
        tmp_label = labels.read(1)
        y[i] = unpack('>B', tmp_label)[0]
    return (x, y)

ld = get_labeled_data("C:/Users/XBGFD/Desktop/Programming/NeuralNetworks/HRR/train-images-idx3-ubyte.gz", "C:/Users/XBGFD/Desktop/Programming/NeuralNetworks/HRR/train-labels-idx1-ubyte.gz")
def sigmoid(x):
    return 1/(1+numpy.exp(-x))

def sigmoid_P(x):
    return sigmoid(x) * (1 - sigmoid(x))

def cost(i, t):
    return (i - t) ** 2

def cost_P(i, t):
    return 2 * (i - t)

# 10x28x28 - number x row x column
weights = numpy.random.random((10, 28, 28))
biases = numpy.random.random((10, 28, 28))
dr = 0
da = 0
for loopi in range(10000):
    r = numpy.random.randint(0, len(ld[0][0]))
    targets = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    image = ld[0][r]
    label = ld[1][r][0]
    # weighted 3D Matrix of [number][row][column]
    predictions = []
    wPredictions = []
    # average of predictions for each number
    avgPred = []
    avgPred2 = []
    img = list(image)
    for i in range(10):
        x = []
        y = []
        for b, w in zip(biases[i], weights[i]):
            x.append(sigmoid(numpy.dot(w, image) + b))
            y.append(numpy.dot(w, image) + b)
        predictions.append(x)
        avgPred.append(numpy.average(list(x)))
        avgPred2.append(numpy.average(list(y)))
    for i in range(10):
        sqError = cost(avgPred[i], targets[i])
        # derivative of the cost with respect to each of the weights and biases
        dc_dp = cost_P(avgPred[i], targets[i])
        dp_dz = sigmoid_P(avgPred2[i])

        #for b, w in zip(biases[i], weights[i]):
        for imgRow in range(28):
            for imgCol in range(28):
                dz_dw = image[imgRow][imgCol]
                dz_db = 1
                print("dc_dp: " + str(dc_dp) + "\ndp_dz: "+ str(dp_dz) + "\ndz_dw: " + str(dz_dw))
                dc_dw = dc_dp * dp_dz * dz_dw
                dc_db = dc_dp * dp_dz * dz_db
                dr = dc_dw
                weights[i][imgRow][imgCol] -= learningRate * dc_dw
                da = weights[i][imgRow][imgCol]
                biases[i][imgRow][imgCol] -= learningRate * dc_db


while True:
    big = 0
    intid = int(input())
    imag = ld[0][intid]
    for l in range(10):
        papa = []
        for b, w in zip(biases[i], weights[i]):
            papa.append(sigmoid(numpy.dot(w, imag) + b))
        lol = numpy.average(papa)
        if(lol > big):
            big = l
    print(str(dr) + " " + str(da))
    print(big)

权重没有变化,因为dp_dz始终为0,我不确定是什么原因造成的。我并不是说它们在变化,而只是很小的变化,它们实际上根本没有变化。我相信这通常与我的方法有关,但是我不确定我还能如何解决这个问题,因为我对神经网络非常陌生。任何帮助将不胜感激!

0 个答案:

没有答案