我的代码正在运行,但即使在五十个纪元之后,准确性仍然保持不变。我的学习率是0.01。有人可以告诉我正在做的错误。大部分代码来自michael nielsen的神经网络。代码识别两个数字,因此我采用两个输出神经元并将标签转换为一个热矢量。类正在使用

nn = MultiLayerPerceptron([784,100,50,2]) 
Acc =nn.fit(X_train,y_train,X_test,y_test,0.1)

import numpy as np
from sklearn.utils import shuffle

def sigmoid(X):
    return 1.0/(1.0 + np.exp(-X))

def sigmoid_d(X):
    return sigmoid(X)*(1.0 - sigmoid(X))

def Relu(X):
    return max(0,X)

class MultiLayerPerceptron(object):
    def __init__(self,layers,alpha=0.001):
        self.layers = layers
        self.num_layers = len(layers)
        self.input_layer_size = layers[0]
        self.output_layer_size = layers[-1]
        self.biases = []
        self.weights = []
        for i in layers[1:]:
        self.alpha = alpha

        for i,j in zip(layers[:-1],layers[1:]):
            self.weights.append(np.random.randn(j, i))

    def feedforward(self,X):

        activation = X 
        for b,w in zip(self.biases,self.weights):
            xx = np.dot(w,activation)+b
            activation = sigmoid(xx)
        return activation

    def backprop(self, x, y):

        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # feedforward

        x = x.reshape(len(x),1)

        activation = x        
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer
        for b, w in zip(self.biases, self.weights):
        #print ("w acct",w.shape,activation.shape)            
            z = (np.dot(w, activation)+b)
            activation = sigmoid(z)
    # backward pass
     #print ("Loss is ",y-activations[-1])
    #print "y is",y.shape,activation.shape
        y = y.reshape(2,1)

        delta = (activations[-1] - y)*sigmoid_d(zs[-1])
    #print delta
    #print "delta is ",delta.shape
        nabla_b[-1] = delta
    #print "activation is ",activations[-2].transpose().shape
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())

        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_d(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
        print nabla_w[0]
        return (nabla_b, nabla_w)

    def predict(self, test_data):
        predictions = []
        for i in test_data:
            i = i.reshape(len(i),1)
        return predictions

    def calAccuracy(self,C,D):
        print "C is ",C,D

        val = np.zeros(len(D))
        j = 0
        for i in D:
            if i[0]==1:
        print "val is",val
        correct = 0
        for i in range(len(C)):
            if val[i]==C[i]:

        B = C.shape[0] * 1.0

        return correct/B

    def fit(self,X,y,test_data,test_label,alpha=0.001,iters=200, batch_size=1000):
        data = X
        Accuracy = []
        for i in range(iters):         

            data,y = shuffle(data,y)           
            batch = [data[j:j+batch_size] for j in range(0,len(X),batch_size)]                              
           label_sz = 0

            for ba in batch:
                copy_b = [np.zeros(b.shape) for b in self.biases]
                copy_w = [np.zeros(w.shape)for w in self.weights]
                ba2 = y[label_sz:label_sz+batch_size]
                label_sz +=batch_size

                for X_d,y_d in zip(ba,ba2):
                    del_b,del_w = self.backprop(X_d,y_d)

                #for i in range(0,1):
                #    print "del",del_w[i]

                    copy_b = [a+b for a,b in zip(copy_b,del_b)]              

                    copy_w = [a+b for a,b in zip(copy_w,del_w)]

            #self.weights = [w - ((alpha*1.0)/len(ba)*1.0)*b for w,b in zip(self.weights,copy_w)]
                for x in range(len(self.weights)):
                    self.weights[x] = self.weights[x] -((alpha*1.0)/len(ba)*1.0)*copy_w[x]

            #for w_i in range(0,1):
             #   print del_w[w_i]

                self.biases = [w - ((alpha*1.0)/len(ba)*1.0)*b for w,b in zip(self.biases,copy_b)]
            print ("epoch :",i)

            if (i+1)%2 == 0:
                predictions = self.predict(test_data)
                predictions = np.array(predictions)
            #print predictions
            #print test_label
        return Accuracy  

