我正在尝试在python中编写递归神经网络,但在获取反向传播步骤以正确计算梯度时遇到了麻烦,因为当我使用梯度检查进行检查时,相对误差通常为1e-2,而我无法看看错误在哪里。任何帮助将不胜感激。
def backward(cache, next, prob, target,model):
wy, by, wf, bf, wu, bu, wo, bo, wc, bc = model
c_temp, hf, hu, ho, c, a, X, c_old = cache
a_next, c_next = next
dy = np.copy(prob)
dy[target] -= 1
dwy = np.dot(dy, a.T)
dby = dy
dh = wy.T @ dy + a_next
dho = tanh(c) * dh
dho = sigmoidGradient(ho) * dho
dc = ho * dh * tanhGradient(c)
dc = dc + c_next
dhf = c_old * dc
dhf = sigmoidGradient(hf) * dhf
dhu = c_temp * dc
dhu = sigmoidGradient(hu) * dhu
dc_temp = hu * dc
dc_temp = tanhGradient(c_temp) * dc_temp
dwf = np.dot(dhf, X.T)
dbf = dhf
dXf = np.dot(wf.T, dhf)
dwu = np.dot(dhu, X.T)
dbu = dhu
dXu = np.dot(wu.T, hu)
dwo = np.dot(dho, X.T)
dbo = dho
dXo = np.dot(wo.T, dho)
dwc = np.dot(dc_temp, X.T)
dbc = dc_temp
dXc = np.dot(wc.T, dc_temp)
dX = dXo + dXc + dXu + dXf
a_next = dX[:hidden_size, :]
c_next = hf * dc
next = (a_next, c_next)
grad = [dwy, dby, dwf, dbf, dwu, dbu, dwo, dbo, dwc, dbc]
return next, grad
注意:dhf是忘记门,dhu是更新门,dho是输出门。