我正在尝试理解反向传播,因为我使用了一些python代码,但它注意到正常工作。当我使用xor输入输出训练时,错误不会收敛。但是如果我改变xor的最后一个输出的值就会收敛。
如果我将一些目标输出值> 1的误差收敛于目标-1,那么这个看起来并不合适。
import numpy as np
import random
class neural_network():
activation = [] #List of values with the values of activation of each layers
weightsIn = []
weightsOut = []
def __init__(self, sizeOfLayers):
'''
sizeOfLayers: Tuple with numbers of neurons of each layer
(in, hidden, out)
'''
if len(sizeOfLayers) > 3:
raise ValueError('Wrong number of layers')
self.sizeOfLayers = sizeOfLayers
for i in range(len(sizeOfLayers)):
if i == 0:
#input layer + bias
self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
else:
self.activation.append(sizeOfLayers[i]*[0.0])
# Wi = len(Hid) x len(IN)+1(bias)
self.weightsIn = np.random.random((sizeOfLayers[1], sizeOfLayers[0] + 1))
# Wo = len(OUT) x len(Hid)
self.weightsOut = np.random.random((sizeOfLayers[2], sizeOfLayers[1]))
def forward(self, X):
'''
X: Vetor de entradas
'''
#In+bias add ativation vector
self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
#sum of (weights x in)
self.sumHidden = self.weightsIn.dot(self.activation[0])
#Ativation of hidden layer
self.activation[1] = (self.sigmoid(self.sumHidden))
#sum of(out weights x activation of last layer)
self.sumOut = self.weightsOut.dot(self.activation[1])
#activation of output
self.activation[2] = (self.sigmoid(self.sumOut))
return self.activation[2].T
def backPropagate(self, Y, trainRate = 0.1):
'''
Y: output target
trainRate:
'''
if len(Y) != self.sizeOfLayers[2]:
raise ValueError('Wrong number of inputs')
#Calc of output delta
error_o = Y.T - self.activation[2].T
out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
#Calc of hidden delta
error_h = out_delta.T.dot(self.weightsOut)
hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T
# update output weights output
change_o = self.activation[1] * out_delta.T
for i in range(self.sizeOfLayers[2]):
for j in range(self.sizeOfLayers[1]):
self.weightsOut[i][j] = self.weightsOut[i][j] + trainRate*change_o[j][i]
# update Input weights
change_h = self.activation[0] * hiden_delta.T
for i in range(self.sizeOfLayers[1]):
for j in range(self.sizeOfLayers[0]):
self.weightsIn[i][j] = self.weightsIn[i][j] + trainRate*change_h[j][i]
#Error
return np.sum((Y.T - self.activation[2].T)**2)/0.5
def sigmoid(self, z, derv = False):
if derv == False:
return 1/(1+np.exp(-z))
def sigmoidPrime(self, z):
return self.sigmoid(z)*(1-self.sigmoid(z))
def train(self, target, trainRate = 0.001, it = 50000):
for i in range(it):
error = 0.0
for t in target:
inputs = np.array(t[0])
targets = np.array([t[1]])
self.forward(inputs)
error = error + self.backPropagate(targets, trainRate)
nn = neural_network((2,6,1))
xor = [
[[0,0], [0]],
[[0,1], [1]],
[[1,0], [1]],
[[1,1], [0]] #If I change her to 1 it converges
]
nn.train(xor)
修改 根据DiegoStéfano所说的(谢谢Diego)进行了修改,但错误还没有收敛。
import numpy as np
import math
import random
from scipy.special import expit
from sklearn.preprocessing import normalize
class neural_network(object):
activation = []
weightsIn = []
weightsOut = []
def __init__(self, sizeOfLayers):
'''
sizeOfLayers: Tuple with numbers of neurons of each layer
(in, hidden, out)
'''
self.sizeOfLayers = sizeOfLayers
for i in range(len(sizeOfLayers)):
self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
self.weightsIn = np.random.normal(scale=0.1, size = (sizeOfLayers[1], sizeOfLayers[0] + 1))
self.weightsOut = np.random.normal(scale=0.1, size = (sizeOfLayers[2], sizeOfLayers[1] + 1))
def forward(self, X):
'''
X: Vetor de entradas
'''
#In+bias add ativation vector
self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
#sum of (weights x in)
self.sumHidden = self.weightsIn.dot(self.activation[0])
#+bias add ativation vector
self.activation[1] = np.vstack((expit(self.sumHidden), np.array([1])))
#sum of(out weights x activation of last layer)
self.sumOut = self.weightsOut.dot(self.activation[1])
#activation of output
self.activation[2] = (expit(self.sumOut))
return self.activation[2].T
def backPropagate(self, X, Y, trainRate = 0.1):
self.forward(X)
#Calc of output delta
error_o = Y - self.activation[2].T
out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
#Calc of hidden delta
error_h = out_delta.T.dot(self.weightsOut)
hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T
# update output weights output
change_o = self.activation[1] * np.transpose(out_delta)
self.weightsOut = self.weightsOut + trainRate*change_o.T
# update hidden weights output
change_h = self.activation[0].dot( hiden_delta[:-1].T)
self.weightsIn = self.weightsIn + trainRate*change_h.T
#error
return np.sum((Y - self.activation[2].T)**2)*0.5
def train(self, input_list, epochs):
for epoch in range(epochs):
ErrAcc = 0.0
for inputs, targets in input_list:
Err = self.backPropagate(np.array(inputs), np.array(targets), 0.2)
ErrAcc = ErrAcc + Err
if epoch % 1000 == 0:
print 'Epoch =', epoch, 'ErrAcc =', ErrAcc
def sigmoidPrime(self,x):
return expit(x)*(1-expit(x))
nn = neural_network((2,10,1))
xor = [
[[0,0], [0]],
[[0,1], [1]],
[[1,0], [1]],
[[1,1], [0]] #If I change her to 1 it converges
]
nn.train(xor, 300000)
答案 0 :(得分:0)
以下是我对您的代码所做的修改,使其有效:
也会向输出神经元添加偏差。网络中的所有神经元都应该拥有它,因为它将激活字段与原点分离,因此shifts your activation function left or right, greatly improving the chances of successful learning。
使用np.random.random
(在区间[0.0,1.0)中生成数字来初始化权重),使用np.random.uniform
在[-1.0,1.0)中生成均匀的随机浮点数。
将输入空间置于原点周围(即删除均值)并将其标准化。
以下是如何进行初始化:
for i in range(len(sizeOfLayers)):
self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
self.weightsIn = np.random.uniform(-1,1,(sizeOfLayers[1], sizeOfLayers[0] + 1))
self.weightsOut = np.random.uniform(-1,1,(sizeOfLayers[2], sizeOfLayers[1] + 1))
然后你还必须在函数activation
中将{1}添加到forward
:
self.activation[1] = np.vstack((self.sigmoid(self.sumHidden), np.array([1])))
您可能想要更改学习率以使其正常工作(约0.5为我工作)。此外,您的均方误差计算是错误的:您应该乘以0.5,而不是除以。
以下是您修改后的代码:
import numpy as np
import random
class neural_network():
activation = [] #List of values with the values of activation of each layers
weightsIn = []
weightsOut = []
def __init__(self, sizeOfLayers):
'''
sizeOfLayers: Tuple with numbers of neurons of each layer
(in, hidden, out)
'''
if len(sizeOfLayers) > 3:
raise ValueError('Wrong number of layers')
self.sizeOfLayers = sizeOfLayers
for i in range(len(sizeOfLayers)):
#input layer + bias
self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
# Wi = len(Hid) x len(IN)+1(bias)
self.weightsIn = np.random.uniform(-1,1,(sizeOfLayers[1], sizeOfLayers[0] + 1))
# Wo = len(OUT) x len(Hid)
self.weightsOut = np.random.uniform(-1,1,(sizeOfLayers[2], sizeOfLayers[1] + 1))
def forward(self, X):
'''
X: Vetor de entradas
'''
#In+bias add ativation vector
self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
#sum of (weights x in)
self.sumHidden = self.weightsIn.dot(self.activation[0])
#Ativation of hidden layer
self.activation[1] = np.vstack( ( self.sigmoid(self.sumHidden), np.array([1]) ) )
#sum of(out weights x activation of last layer)
self.sumOut = self.weightsOut.dot(self.activation[1])
#activation of output
self.activation[2] = (self.sigmoid(self.sumOut))
return self.activation[2].T
def backPropagate(self, Y, trainRate = 0.1):
'''
Y: output target
trainRate:
'''
if len(Y) != self.sizeOfLayers[2]:
raise ValueError('Wrong number of inputs')
#Calc of output delta
error_o = Y.T - self.activation[2].T
out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
#Calc of hidden delta
error_h = out_delta.T.dot(self.weightsOut)
hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T
# update output weights output
change_o = self.activation[1] * out_delta.T
for i in range(self.sizeOfLayers[2]):
for j in range(self.sizeOfLayers[1]):
self.weightsOut[i][j] = self.weightsOut[i][j] + trainRate*change_o[j][i]
# update Input weights
change_h = self.activation[0] * hiden_delta.T
for i in range(self.sizeOfLayers[1]):
for j in range(self.sizeOfLayers[0]):
self.weightsIn[i][j] = self.weightsIn[i][j] + trainRate*change_h[j][i]
#Error
return np.sum((Y.T - self.activation[2].T)**2)*0.5
def sigmoid(self, z, derv = False):
if derv == False:
return 1/(1+np.exp(-z))
def sigmoidPrime(self, z):
return self.sigmoid(z)*(1-self.sigmoid(z))
def train(self, target, trainRate = 0.5, it = 50000):
for i in range(it):
error = 0.0
for t in target:
inputs = np.array(t[0])
targets = np.array([t[1]])
self.forward(inputs)
error = error + self.backPropagate(targets, trainRate)
nn = neural_network((2,5,1))
xor = [
[[-1.0, -1.0], [0]],
[[-1.0, 1.0], [1]],
[[ 1.0, -1.0], [1]],
[[ 1.0, 1.0], [0]] #If I change her to 1 it converges
]
nn.train(xor)
for e in xor:
nn.forward(e[0])
print nn.activation[2]
祝你好运!