Question

我有一个包含5个单位的隐藏层，有10个单位的输入层和一个标量输出单位。我正在使用ReLu激活，并且在输出层没有非线性，只有加权和。与其使用网络上的现有代码，我还想导出方程式。融合确实令人困惑，我很确定这是错误的。

import numpy as np
import matplotlib.pyplot as plt
import math

d = 10
m = 5
alp = 1e-2
W1 = np.random.randn(m,d)
W2 = np.random.randn(1,m)
a0 = np.random.randn(d,1)
b1 = np.random.randn(m,1)
b2 = np.random.randn(1,1)
y = np.random.randn(1,1)

def compute_loss(y,a2):
       return np.sum(np.power(y-a2,2))/2

def gradient_step(W1,W2,b1,b2,a1,a2,z1):
    W2 += alp*(y-a2)*a1.transpose()
    b2 += (y-a2)
    a1_deriv = np.array(reluDerivative(z1))
    b1 += (y-a2)*(np.matmul(W2,np.diagflat(a1_deriv))).transpose()
    W1 += (y-a2)*(a0.dot(W2).dot(np.diagflat(a1_deriv))).transpose()
    return W1,W2,b1,b2,a1,a2,z1

def reluDerivative(x):
     x[x<=0] = 0
     x[x>0] = 1
     return x

loss_vec = []
num_iterations = 50

for i in range(num_iterations):
    z1 = np.matmul(W1,a0)+b1
    a1 = np.maximum(0,z1)
    a2 = np.matmul(W2,a1)+b2
    loss_vec.append(compute_loss(y,a2))
    W1,W2,b1,b2,a1,a2,z1 = gradient_step(W1,W2,b1,b2,a1,a2,z1)
plt.plot(loss_vec)

浅层神经网络中的收敛

0 个答案: