我需要(适当地)实现软最大和梯度损失以及一个可选的辍学(最好是反向),我该怎么做呢?这是我的代码。
def train(self, features, targets, optimizer, decay_rate_1 = None,
decay_rate_2 = None, epsilon = None):
# Feed Forward
l0 = features
l1 = self.activation_function(np.dot(l0, self.w0)+ self.bh)
l2 = softmax(np.dot(l1, self.w1)+ self.bo)
# Backpropagation
l2_error = l2 - targets
l2_delta = l2_error * self.activation_function(l2, deriv=True)
l1_error = l2_delta.dot(self.w1.T)
l1_delta = l1_error * self.activation_function(l1, deriv=True)
if optimizer == 'sgd':
# Update Weights
self.w1 -= self.lr * l1.T.dot(l2_delta)
self.w0 -= self.lr * l0.T.dot(l1_delta)