我的代码正在运行,但即使在五十个纪元之后,准确性仍然保持不变。我的学习率是0.01。有人可以告诉我正在做的错误。大部分代码来自michael nielsen的神经网络。代码识别两个数字,因此我采用两个输出神经元并将标签转换为一个热矢量。类正在使用
进行实例化nn = MultiLayerPerceptron([784,100,50,2])
Acc =nn.fit(X_train,y_train,X_test,y_test,0.1)
import numpy as np
from sklearn.utils import shuffle
def sigmoid(X):
return 1.0/(1.0 + np.exp(-X))
def sigmoid_d(X):
return sigmoid(X)*(1.0 - sigmoid(X))
def Relu(X):
return max(0,X)
class MultiLayerPerceptron(object):
def __init__(self,layers,alpha=0.001):
self.layers = layers
self.num_layers = len(layers)
self.input_layer_size = layers[0]
self.output_layer_size = layers[-1]
self.biases = []
self.weights = []
for i in layers[1:]:
self.biases.append(np.random.randn(i,1))
self.alpha = alpha
for i,j in zip(layers[:-1],layers[1:]):
self.weights.append(np.random.randn(j, i))
def feedforward(self,X):
activation = X
for b,w in zip(self.biases,self.weights):
xx = np.dot(w,activation)+b
activation = sigmoid(xx)
return activation
def backprop(self, x, y):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
# feedforward
x = x.reshape(len(x),1)
activation = x
activations = [x] # list to store all the activations, layer by layer
zs = [] # list to store all the z vectors, layer by layer
for b, w in zip(self.biases, self.weights):
#print ("w acct",w.shape,activation.shape)
z = (np.dot(w, activation)+b)
zs.append(z)
activation = sigmoid(z)
activations.append(activation)
# backward pass
#print ("Loss is ",y-activations[-1])
#print "y is",y.shape,activation.shape
y = y.reshape(2,1)
delta = (activations[-1] - y)*sigmoid_d(zs[-1])
#print delta
#print "delta is ",delta.shape
nabla_b[-1] = delta
#print "activation is ",activations[-2].transpose().shape
nabla_w[-1] = np.dot(delta, activations[-2].transpose())
for l in range(2, self.num_layers):
z = zs[-l]
sp = sigmoid_d(z)
delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
print nabla_w[0]
return (nabla_b, nabla_w)
def predict(self, test_data):
predictions = []
for i in test_data:
i = i.reshape(len(i),1)
predictions.append(np.argmax(self.feedforward(i)))
return predictions
def calAccuracy(self,C,D):
print "C is ",C,D
val = np.zeros(len(D))
j = 0
for i in D:
if i[0]==1:
val[j]=0
else:
val[j]=1
j+=1
print "val is",val
correct = 0
for i in range(len(C)):
if val[i]==C[i]:
correct+=1
B = C.shape[0] * 1.0
return correct/B
def fit(self,X,y,test_data,test_label,alpha=0.001,iters=200, batch_size=1000):
data = X
Accuracy = []
for i in range(iters):
data,y = shuffle(data,y)
batch = [data[j:j+batch_size] for j in range(0,len(X),batch_size)]
label_sz = 0
for ba in batch:
copy_b = [np.zeros(b.shape) for b in self.biases]
copy_w = [np.zeros(w.shape)for w in self.weights]
ba2 = y[label_sz:label_sz+batch_size]
label_sz +=batch_size
for X_d,y_d in zip(ba,ba2):
del_b,del_w = self.backprop(X_d,y_d)
#for i in range(0,1):
# print "del",del_w[i]
copy_b = [a+b for a,b in zip(copy_b,del_b)]
copy_w = [a+b for a,b in zip(copy_w,del_w)]
#self.weights = [w - ((alpha*1.0)/len(ba)*1.0)*b for w,b in zip(self.weights,copy_w)]
for x in range(len(self.weights)):
self.weights[x] = self.weights[x] -((alpha*1.0)/len(ba)*1.0)*copy_w[x]
#for w_i in range(0,1):
# print del_w[w_i]
self.biases = [w - ((alpha*1.0)/len(ba)*1.0)*b for w,b in zip(self.biases,copy_b)]
print ("epoch :",i)
if (i+1)%2 == 0:
predictions = self.predict(test_data)
predictions = np.array(predictions)
#print predictions
#print test_label
Accuracy.append(self.calAccuracy(predictions,test_label))
print(Accuracy)
return Accuracy