我正在做一个学校项目,并且坚持使用现有的前向支撑结构在Numpy中实施反向传播。该脚本的目的是仅使用numpy来创建简单的动态(意味着任意数量的层和节点)完全连接的网络。
我认为我必须找到激活函数的导数,并将其乘以原始误差以及我向后移动时遇到的每个激活函数的导数。
但是,我在弄清楚如何在脚本中正确实现这一点时遇到了麻烦。
如果有人可以用英语解释英语,考虑到此处设置的复杂性,甚至可以为处理动态尺寸反向传播的视频/帖子提供建议,那将是一个很大的帮助。
现在,所有权重和偏差都存储在列表中,以供将来使用反向支持,而使用反向支持功能中的少量代码,我能够获得每个输出的错误。
此代码块
#initialize a test model w/ 128 bacth and lr of 0.01
model = Model(128, 0.01)
#simple x data input
X = np.array([[1,1],[0,0],[12,5]])
Y = np.array([[1],[0],[-1]])
#adding 4 layers
z = model.add(X, 3, "sigmoid")
z = model.add(z, 1, "sigmoid", output=True)
#this is a full forward pass through the layers
z = model.predict(X)
print(z)
#this is the error of the predictions
print(model.backprop(z, Y))
输出以下向量:
[[0.50006457]
[0.50006459]
[0.50006431]]
[[0.24993544]
[0.2500646 ]
[2.25019293]]
就像我说的那样,不确定如何从此处向前(或向后;))。
下面是运行该示例所需的完整脚本:
import math
import numpy as np
#everything below is defining activation functions
#--------------------------------------------------------------------------------------------
def b_relu(input):
return max((0, max(input)))
def bd_relu(input):
if(input < 0 or input == 0):
return 0
else:
return 1
def b_sigmoid(x):
return 1 / (1 + math.exp(-x))
def bd_sigmoid(input):
return sigmoid(input) * (1 - sigmoid(input))
def b_tanh(input):
top = (math.exp(input) - math.exp(-input))
bottom = (math.exp(input) + math.exp(-input))
return (top/bottom)
#helper functions for tanh
def cosh(input):
return ((math.exp(input) + math.exp(-input)) / 2)
def sinh(input):
return ((math.exp(input) - math.exp(-input)) / 2)
def bd_tanh(input):
top = (math.pow(cosh(input), 2) - math.pow(sinh(input), 2))
bottom = math.pow(input, 2)
return (top / bottom)
def b_softmax(z):
# subracting the max adds numerical stability
shiftx = z - np.max(z,axis=1)[:,np.newaxis]
exps = np.exp(shiftx)
return exps / np.sum(exps,axis=1)[:,np.newaxis]
def bd_softmax(Y_hat, Y):
return Y_hat - Y
def b_linear(input):
return input
def bd_linear(input):
return 1
#vectorizing the activation and deriv. activation functions
relu = np.vectorize(b_relu)
d_relu = np.vectorize(bd_relu)
sigmoid = np.vectorize(b_sigmoid)
d_sigmoid = np.vectorize(bd_sigmoid)
tanh = np.vectorize(b_tanh)
d_tanh = np.vectorize(bd_tanh)
softmax = np.vectorize(b_softmax)
d_softmax = np.vectorize(bd_softmax)
linear = np.vectorize(b_linear)
d_linear = np.vectorize(bd_linear)
class Model:
def __init__(self, batch, lr):
#initializing self lists to keep track of stuff for bacthes, forward prop & backporp
self.batch = batch
self.lr = lr
self.W = []
self.B = []
self.A = []
self.Z = []
self.X = []
self.layers = []
self.tempW = []
self.tempB = []
#store error for backprop
self.output_error = []
#initialize the weights during 'model.add' so we can test our network shapes dynamically w/out model.compile
#added an output bool here so we can make sure the shape of the output network is (1,n)
def initial_weights(self, input_data, output_shape, output=False):
B = np.zeros((1, output_shape))
#assigning the shape
W = np.random.uniform(-1e-3, 1e-3, size = (input_data.shape[len(input_data.shape) - 1], output_shape))
self.B.append(B)
self.W.append(W)
def add(self, input_data, output_shape, activation, output=False):
#append to layers so we have a correct index value
self.layers.append(69)
#making sure our data in a numpy array
if (type(input_data) == np.ndarray):
X = input_data
else:
X = np.asarray(input_data)
#adding data and activations to self lists
self.X.append(X)
self.A.append(activation)
#keep track of our index & initializing random weights for dynamic comatibility testing
index = len(self.layers)-1
self.initial_weights(input_data, output_shape, output=False)
X2 = self.forward(input_data, index)
#printing layer info
print("Layer:", index)
print("Input Shape: ", X.shape)
print("Weight Shape: ", self.W[index].shape)
print("Output Shape: ", X2.shape)
print(" ")
return(X2)
def forward(self, input_data, index):
#pulling weights and biases from main lists for operations
B = self.B[index]
W = self.W[index]
#matmul of data # weights + bias
Z = np.matmul(input_data, W) + B
#summing each row of inputs to activation node
for x in Z:
x = sum(x)
#pulling activation from index
act = str(self.A[index])
#activating
Z = activate(Z, act)
#keeping track of Z i guess
self.Zappend = Z
return(Z)
def predict(self, input_data):
for x in range(len(self.layers)):
z = model.forward(input_data, x)
input_data = z
return z
def backprop(self, model_output, ground_truth):
#------------------------------
#now begins the backprop portion
#let's start with finding the error between predictions and actual values
#gonna do MSE to keep it simple
self.output_error = (ground_truth - model_output) ** 2
#so now we have the error of the output layer, this tells us two things, how wrong we were, and in which direction we should update
#the outputs of these nodes
'''
What to do if this was linear regression (for m & b)
1. Take the error and multiply it by the transpose of the last layer weights
(I think the error in this case is where the prime activation function should be if we had activations)
2. The last layer bias is just the error
3. The second to last layer inputs is the bias times the transpose of second layers weights
3. Then I have no idea
'''
return self.output_error