我编写以下图层来模拟喀拉拉邦的辍学层:
class ShakeoutDense(Dense):
def __init__(self, p, **kwargs):
self.p = p
super(ShakeoutDense, self).__init__(**kwargs)
def generate_bernoulli_matrix(self, shape):
r_matrix = tf.random.uniform(shape=shape, maxval=1)
b = tf.math.greater(self.p, r_matrix)
f = tf.cast(b, dtype=tf.float32)
f = Lambda(lambda x: K.stop_gradient(x))(f)
return f
def build(self, input_shape):
super(ShakeoutDense, self).build(input_shape)
def call(self, inputs, **kwargs):
input_dim = inputs.get_shape().as_list()[-1]
kernel_shape = [input_dim, self.units]
mask = self.generate_bernoulli_matrix(kernel_shape)
weight = self.kernel * mask
output = K.dot(inputs, weight)
if self.use_bias:
output = K.bias_add(output, self.bias, data_format='channels_last')
if self.activation is not None:
output = self.activation(output)
return output
我收到以下错误:
ValueError: An operation has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.
我放了Lambda(lambda x: K.stop_gradient(x))
来防止传播,但这是行不通的!我该怎么办? :-\