我已经在python中编写了softmax反向传播函数(softmax_backward_propagate
),该函数可以按我的预期工作,但是我被告知我应该能够不带循环地实现它。我一直试图弄清楚,但我被卡住了。没有人能解释在没有循环的情况下完成相同功能的最佳方法吗?
http://saitcelebi.com/tut/output/part2.html <-关于softmax函数及其导数的很好的解释
谢谢您的时间。
def softmax_backward_propagate(dJ_dyhat, cache):
""" Compute the backward propagation through the softmax activations.
i.e. compute dJ_dz
Inputs:
dJ_dyhat: Upstream gradients on the outputs the softmax activations.
Shape (n_h, m) where n_h is number of units in the linear
portion of the layer, and m is number of samples
cache: cached values saved during forward propagate
Returns:
dJ_dz: Gradients on the inputs to this layer. Shape (n_h, m)
"""
for i in range(cache.shape[1]):
y = np.copy(cache[:,i]).reshape(-1,1)
matrix = np.matmul(y, np.ones(y.shape).T) * (np.identity(y.size) - np.matmul(np.ones((y.shape)), y.T))
if i==0:
m2 = np.matmul(matrix,dJ_dyhat[:,i])
else:
m2 = np.column_stack((m2,np.matmul(matrix,dJ_dyhat[:,i])))
dJ_dz = m2.reshape(cache.shape)
return dJ_dz
def softmax_forward_propagate(z):
""" Compute the forward propagation through the softmax activations.
i.e. application of softmax on z
Inputs:
z: Inputs to this layer (i.e. outputs of the previous linear
layer). shape (n_h, m) where n_h is number of units in the
previous linear layer, and m is number of samples.
Returns:
yhat: softmax applied to z. Shape (n_h, m)
cache: Objects to be sent to backpropagation for this layer.
"""
cache = None
ex = np.exp(np.copy(z))
exsum = np.sum(np.copy(ex),0)
yhat = np.copy(ex)/np.copy(exsum)
cache = yhat
return yhat, cache