我最近编写了用于在python中生成和训练人工神经网络的代码(使用反向传播)。这一切都很有效,但是这个问题困扰了我好几天了,我似乎无法找到原因:
如果集合仅包含1个输入/输出对,则我生成的网络仅减少数据集上的错误。例如。对于2个输入神经元,我的输入是[[1,1]],为简单起见,[[1,1]]作为输出。这很好用,错误减少了。但是如果我选择[[1,1],[0,0]]作为输入,则无论我让它工作多久,错误都会交替出现,或者有时只会出现2次。这是我的代码:
import numpy as np
from graphics import *
import math
class Neuron():
def __init__(self,input=0,output=0,activation_func_key="logistic",bias=False,inp=False):
self.b = bias
self.input = input + self.b
self.output = output
self.key = activation_func_key
self.inp = inp
def add_to_input(self,input_=0):
self.input += input_
def add_to_input(self,input_=0):
self.input += float(input_)
def get_output(self):
if self.b:
return 1
if self.inp:
return self.input
if self.key is "relu":
return np.maximum((self.input),0)
if self.key is "tanh":
return np.tanh(self.input)
if self.key is "softplus":
return np.log(1+np.e**(self.input))
if self.key is "logistic":
return 1/(1+np.e**-(self.input))
def reset(self):
self.input = 0
self.ouput = 0
class Network():
def __init__(self,weight_matrix,input_neuron_num,hidden_neuron_nums,ouput_neuron_num,activation_f="relu"):
self.weight_matrix = weight_matrix
self.neurons = [Neuron(activation_func_key=activation_f) for _ in range(weight_matrix[0].__len__())]
back_layers = 0
val = input_neuron_num
counter = 0
for i in range(weight_matrix[0].__len__()):
bias = False
inp = False
if i is val:
bias = True
if counter < hidden_neuron_nums.__len__():
val += hidden_neuron_nums[counter]+1
counter += 1
if i < input_neuron_num:
inp = True
self.neurons[i] = Neuron(activation_func_key=activation_f,inp=inp,bias=bias)
self.input_neuron_num = input_neuron_num
self.hidden_neuron_nums = hidden_neuron_nums
self.output_neuron_num = ouput_neuron_num
def propagate_forward(self,inputv,output_len):
for i in range(self.neurons.__len__()):
if i < self.input_neuron_num:
self.neurons[i].add_to_input(inputv[i])
else:
spalte = self.weight_matrix[:][i]
for j in range(spalte.__len__()):
#if i > j: #nur obere dreiecksmatrix
self.neurons[i].add_to_input(self.weight_matrix[j][i] * self.neurons[j].get_output())
output = []
for i in range(output_len):
output.append(self.neurons[self.weight_matrix[0].__len__()-i-1].get_output())
output.reverse()
return output
def reset(self):
for n in self.neurons:
n.reset()
def make_net(inp,hidden,out,activation_f="logistic"):
hsum = 0
for i in range(hidden.__len__()):
hsum += hidden[i]+1
sum = inp + 1 + hsum + out
weights = [[0 for _ in range(sum)] for _ in range(sum)]
neurons_till_this_layer = 0
for i in range(inp+1):
for j in range(hidden[0]):
weights[i][inp+j+1] = np.random.uniform(-1,1)
neurons_till_this_layer += inp+1
for i in range(hidden.__len__()-1):
for n in range(hidden[i]+1):
for m in range(hidden[i+1]):
weights[n+neurons_till_this_layer][m+neurons_till_this_layer+hidden[i]+1] = np.random.uniform(-1,1)
neurons_till_this_layer += hidden[i]+1
for i in range(hidden[hidden.__len__()-1]+1):
for j in range(out):
weights[i+neurons_till_this_layer][j+neurons_till_this_layer+hidden[hidden.__len__()-1]+1] = np.random.uniform(-1,1)
net = Network(weights,inp,hidden,out,activation_f=activation_f)
return net
def backpropagation(net,inputs,outputs,learning_rate=0.001,epsilon=0.1,draw=False,win=0):
if draw:
win = 0
if win is not 0:
win = win
else:
win = draw_window(net)
if inputs[0][:].__len__() is not net.input_neuron_num:
print("Error: Input length does not match! No learning could be done!")
return net
if outputs[0][:].__len__() is not net.output_neuron_num:
print("Error: Output length does not match! No learning could be done!")
return net
hsum = 0
for i in range(net.hidden_neuron_nums.__len__()):
hsum += net.hidden_neuron_nums[i]+1
outer_delta_w_matrix = [[0 for _ in range(net.input_neuron_num + 1 + hsum + net.output_neuron_num)] for _ in range(net.input_neuron_num + 1 + hsum + net.output_neuron_num)]
for N in range(inputs.__len__()):
net_output = net.propagate_forward(inputs[N],outputs[0].__len__())
delta_w_matrix = [[0 for _ in range(net.input_neuron_num + 1 + hsum + net.output_neuron_num)] for _ in range(net.input_neuron_num + 1 + hsum + net.output_neuron_num)]
delta_j_list = [float(0) for _ in range(net.input_neuron_num + 1 + hsum + net.output_neuron_num)]
for i in range(net.input_neuron_num + 1 + hsum + net.output_neuron_num):
j = net.input_neuron_num + 1 + hsum + net.output_neuron_num - i - 1
derivate = "error"
if net.neurons[j].key is "relu":
derivate = 0 if net.neurons[j].input <= 0 else 1
if net.neurons[j].key is "tanh":
derivate = (1 / (np.cosh(net.neurons[j].input) * np.cosh(net.neurons[j].input)))
if net.neurons[j].key is "softplus":
derivate = 1 / (1 + np.e ** -net.neurons[j].input)
if net.neurons[j].key is "logistic":
#print(" input: %f" % net.neurons[j].input)
derivate = (np.exp(net.neurons[j].input)) / (((np.exp(net.neurons[j].input)) + 1) ** 2)
#derivate = net.neurons[j].output * (1 - net.neurons[i].get_output())
if i < net.output_neuron_num:
delta_j_list[j] = derivate * (net.neurons[j].get_output() - float(outputs[N][net.output_neuron_num-i-1]))
else:
prev_delta_sum = 0
for x in range(net.weight_matrix[j][:].__len__()):
prev_delta_sum += (delta_j_list[j-x] * net.weight_matrix[j][x])
#print(net.neurons[j].input)
delta_j_list[j] = derivate * prev_delta_sum
for i in range(net.input_neuron_num + 1 + hsum + net.output_neuron_num):
for j in range(net.input_neuron_num + 1 + hsum + net.output_neuron_num):
if net.weight_matrix[i][j]:
delta_w_matrix[i][j] = -learning_rate * delta_j_list[j] * net.neurons[i].get_output()
for i in range(net.input_neuron_num + 1 + hsum + net.output_neuron_num):
for j in range(net.input_neuron_num + 1 + hsum + net.output_neuron_num):
outer_delta_w_matrix[i][j] += (delta_w_matrix[i][j] / (inputs.__len__()))
#print_matrix(net.weight_matrix)
#print()
#print("Completed cycle")
net.reset()
for i in range(net.input_neuron_num + hsum + net.output_neuron_num):
for j in range(net.input_neuron_num + hsum + net.output_neuron_num):
if net.weight_matrix[i][j]:
net.weight_matrix[i][j] = net.weight_matrix[i][j] + outer_delta_w_matrix[i][j]
if draw:
draw_net(net, win)
return net
def print_matrix(matrix):
a = matrix[0][:]
b = matrix[:][0]
for i in range(a.__len__()):
print()
for j in range(b.__len__()):
sys.stdout.write("% 1.7f " % matrix[i][j])
print()
def get_dataset_binary(dataset_length=1000,max=4):
rdm_nums = [np.random.randint(0,2**max) for _ in range(dataset_length)]
bin_nums = [0 for _ in range(rdm_nums.__len__())]
length = max
for i in range(rdm_nums.__len__()):
bin_nums[i] = "{0:b}".format(rdm_nums[i])
while bin_nums[i].__len__() < length:
bin_nums[i] = "0" + bin_nums[i]
r_value = [0 for _ in range(dataset_length)]
for i in range(dataset_length):
r_value[i] = list(bin_nums[i])
return r_value
def draw_window(net,autoflush=False):
width = 0
for i in range(net.hidden_neuron_nums.__len__()):
if net.hidden_neuron_nums[i] > width:
width = net.hidden_neuron_nums[i]
if net.input_neuron_num > width:
width = net.input_neuron_num
if net.output_neuron_num > width:
width = net.output_neuron_num
scale_factor = 0.7
counter = 0
r = 50*scale_factor
v_buf = 100*scale_factor
h_buf = 50*scale_factor
d1 = 20*scale_factor
d2 = 12*scale_factor
return GraphWin("Netzwerk", (2*h_buf)+(width*2*r)+(width*h_buf)-h_buf, 2 * v_buf + 2*r+v_buf + net.hidden_neuron_nums.__len__()*2*r + net.hidden_neuron_nums.__len__()*v_buf-v_buf + 2*r+v_buf, autoflush=autoflush)
def draw_net(net,win=0):
width = 0
for i in range(net.hidden_neuron_nums.__len__()):
if net.hidden_neuron_nums[i] > width:
width = net.hidden_neuron_nums[i]+1
if net.input_neuron_num > width:
width = net.input_neuron_num+1
if net.output_neuron_num > width:
width = net.output_neuron_num
scale_factor = 0.7
counter = 0
r = 50*scale_factor
v_buf = 100*scale_factor
h_buf = 50*scale_factor
d1 = 20*scale_factor
d2 = 12*scale_factor
if win is 0:
win = GraphWin("Netzwerk", (2*h_buf)+(width*2*r)+(width*h_buf)-h_buf, 2 * v_buf + 2*r+v_buf + net.hidden_neuron_nums.__len__()*2*r + net.hidden_neuron_nums.__len__()*v_buf-v_buf + 2*r+v_buf)
for i in range(net.weight_matrix.__len__()):
for j in range(net.weight_matrix.__len__()):
if net.weight_matrix[i][j] is not 0:
from_pos = get_neuron_pos(net,i,r,v_buf,h_buf,width)
to_pos = get_neuron_pos(net,j,r,v_buf,h_buf,width)
thickness = np.abs(net.weight_matrix[i][j]) * 10
weight = Polygon([Point(from_pos.x-thickness,from_pos.y), Point(from_pos.x+thickness,from_pos.y),Point(to_pos.x+thickness,to_pos.y), Point(to_pos.x-thickness,to_pos.y)])
if net.weight_matrix[i][j] > 0:
weight.setFill("firebrick4")
else:
weight.setFill("lightskyblue")
label = Text(Point((from_pos.x+to_pos.x)/2, (from_pos.y+to_pos.y)/2), "%0.5f" % net.weight_matrix[i][j])
label.setSize(np.maximum(int(10*scale_factor),5))
weight.draw(win)
label.draw(win)
hidden_neuron_num = 0
for i in range(net.hidden_neuron_nums.__len__()):
hidden_neuron_num += net.hidden_neuron_nums[i]+1
for i in range(net.input_neuron_num+1):
inp_neuron = Circle(Point( (win.width - ((net.input_neuron_num+1) * 2 * r + (net.input_neuron_num+1) * h_buf - h_buf))/2 + i*(2*r+h_buf) + r, v_buf+r),r)
inp_neuron.setFill("springgreen3")
if i is net.input_neuron_num:
inp_neuron.setFill("snow")
name = Text(Point((win.width - ((net.input_neuron_num+1) * 2 * r + (net.input_neuron_num+1) * h_buf - h_buf))/2 + i*(2*r+h_buf) + r, v_buf+r-d1), "%s" % ("Neuron(in) " + str(counter+1)))
if i is net.input_neuron_num:
name.setText("%s" % ("Neuron(bias) " + str(counter+1)))
label = Text(Point((win.width - ((net.input_neuron_num+1) * 2 * r + (net.input_neuron_num+1) * h_buf - h_buf))/2 + i*(2*r+h_buf) + r, v_buf+r+d2), "Output:\n%1.6f" % float(net.neurons[counter].get_output()))
name.setSize(np.maximum(int(10*scale_factor),5))
label.setSize(np.maximum(int(10*scale_factor),5))
inp_neuron.draw(win)
name.draw(win)
label.draw(win)
counter +=1
for i in range(net.hidden_neuron_nums.__len__()):
for j in range(net.hidden_neuron_nums[i]+1):
hidden_neuron = Circle(Point((win.width-((net.hidden_neuron_nums[i]+1)*2*r+(net.hidden_neuron_nums[i]+1)*h_buf-h_buf))/2+j*(2*r+h_buf)+r,(3*r+2*v_buf)+i*(2*r+v_buf)),r)
hidden_neuron.setFill("lavender")
if j is net.hidden_neuron_nums[i]:
hidden_neuron.setFill("snow")
name = Text(Point((win.width-((net.hidden_neuron_nums[i]+1)*2*r+(net.hidden_neuron_nums[i]+1)*h_buf-h_buf))/2+j*(2*r+h_buf)+r, (3*r+2*v_buf)+i*(2*r+v_buf)-d1), "%s" % ("Neuron " + str(counter+1)))
if j is net.hidden_neuron_nums[i]:
name.setText("%s" % ("Neuron(bias) " + str(counter+1)))
label = Text(Point((win.width-((net.hidden_neuron_nums[i]+1)*2*r+(net.hidden_neuron_nums[i]+1)*h_buf-h_buf))/2+j*(2*r+h_buf)+r, (3*r+2*v_buf)+i*(2*r+v_buf)+d2), "Output:\n%1.6f" % net.neurons[counter].get_output())
name.setSize(np.maximum(int(10*scale_factor),5))
label.setSize(np.maximum(int(10*scale_factor),5))
hidden_neuron.draw(win)
name.draw(win)
label.draw(win)
counter += 1
for i in range(net.output_neuron_num):
out_neuron = Circle(Point((win.width-(net.output_neuron_num*2*r+net.output_neuron_num*h_buf-h_buf))/2+i*(2*r+h_buf)+r, net.hidden_neuron_nums.__len__()*2*r + 2*r + net.hidden_neuron_nums.__len__()*v_buf + 2* v_buf + r),r)
out_neuron.setFill("darkgoldenrod3")
name = Text(Point((win.width-(net.output_neuron_num*2*r+net.output_neuron_num*h_buf-h_buf))/2+i*(2*r+h_buf)+r, net.hidden_neuron_nums.__len__()*2*r + 2*r + net.hidden_neuron_nums.__len__()*v_buf + 2* v_buf + r - d1), "%s" % ("Neuron(out) " + str(counter+1)))
label = Text(Point((win.width-(net.output_neuron_num*2*r+net.output_neuron_num*h_buf-h_buf))/2+i*(2*r+h_buf)+r, net.hidden_neuron_nums.__len__()*2*r + 2*r + net.hidden_neuron_nums.__len__()*v_buf + 2* v_buf + r + d2), "Output:\n%1.6f" % net.neurons[counter].get_output())
name.setSize(np.maximum(int(10*scale_factor),5))
label.setSize(np.maximum(int(10*scale_factor),5))
out_neuron.draw(win)
name.draw(win)
label.draw(win)
counter += 1
win.update()
win.wait_window()
def get_neuron_pos(net, n, r, v_buf, h_buf, width):
if n < net.input_neuron_num+1:
return Point(((2*h_buf)+(width*2*r)+(width*h_buf)-h_buf - ((net.input_neuron_num+1) * 2 * r + (net.input_neuron_num+1) * h_buf - h_buf)) / 2 + n * (2 * r + h_buf) + r, v_buf + r)
current = net.input_neuron_num+1
for h in range(net.hidden_neuron_nums.__len__()):
current += (net.hidden_neuron_nums[h]+1)
if n < current:
return Point(((2*h_buf)+(width*2*r)+(width*h_buf)-h_buf-((net.hidden_neuron_nums[h]+1)*2*r+(net.hidden_neuron_nums[h]+1)*h_buf-h_buf))/2+(n-(current-net.hidden_neuron_nums[h]-1))*(2*r+h_buf)+r,(3*r+2*v_buf)+h*(2*r+v_buf))
if n - current < net.output_neuron_num:
return Point(((2*h_buf)+(width*2*r)+(width*h_buf)-h_buf-((net.output_neuron_num)*2*r+(net.output_neuron_num)*h_buf-h_buf))/2+(n-current)*(2*r+h_buf)+r, net.hidden_neuron_nums.__len__()*2*r + 2*r + net.hidden_neuron_nums.__len__()*v_buf + 2* v_buf + r)
return -1
def backpropagation_(net,inputs,outputs,learning_rate=0.01,epsilon=0.1):
endlich_fertig = False
while not endlich_fertig:
net = backpropagation(net,inputs,outputs,learning_rate,epsilon)
endlich_fertig = True
error = 0
for i in range(inputs.__len__()):
output = net.propagate_forward(inputs[i], outputs[0][:].__len__())
for j in range(output.__len__()):
error += np.abs(output[j] - float(outputs[i][j]))
if error > epsilon:
endlich_fertig = False
print("Error: %f" % error)
return net
net = make_net(2,[3,2],2)
print_matrix(net.weight_matrix)
inputs = [[1,1],[0,0]]
outputs = inputs
draw_net(net)
trained_net = backpropagation_(net,inputs,outputs)
draw_net(trained_net)
编辑:输入完整代码,以便您可以轻松地在本地重现行为