我为keras Sequential API编写了一个自定义层,但是当我用Mnist测试它时,它不能一概而论。
class ResidualLayer(Layer):
def __init__(self, conv_layer1, conv_layer2, **kwargs):
self.__conv_layer1 = conv_layer1
self.__conv_layer2 = conv_layer2
super(ResidualLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.__in_shape = input_shape
self.bn1 = BatchNormalization()
self.bn2 = BatchNormalization()
self.rel1 = Activation(activation='relu')
self.rel2 = Activation(activation='relu')
self.add = Add()
self.scale_conv = Conv2D(filters=self.__conv_layer1.filters,
strides=self.__conv_layer1.strides,
kernel_size=(1, 1),
padding=self.__conv_layer1.padding)
super(ResidualLayer, self).build(input_shape)
def call(self, x):
input_layer = x
tmp_layer = self.__conv_layer1(input_layer)
tmp_layer = self.bn1(tmp_layer)
tmp_layer = self.rel1(tmp_layer)
tmp_layer = self.__conv_layer2(tmp_layer)
tmp_layer = self.bn2(tmp_layer)
if self.__conv_layer1.output_shape == self.__in_shape:
tmp_layer = self.add([tmp_layer, input_layer])
else:
scaled_layer = self.scale_conv(input_layer)
tmp_layer = self.add([tmp_layer, scaled_layer])
tmp_layer = self.rel2(tmp_layer)
return tmp_layer
def compute_output_shape(self, input_shape):
return self.__conv_layer2.output_shape
模型架构不是很复杂,这引起了我的怀疑。
我使用Functional API和一个简单函数重新实现了该模型:
def reslayer(x, conv_layer1, conv_layer2):
input_layer = x
tmp_layer = conv_layer1(input_layer)
tmp_layer = BatchNormalization()(tmp_layer)
tmp_layer = Activation(activation='relu')(tmp_layer)
tmp_layer = conv_layer2(tmp_layer)
tmp_layer = BatchNormalization()(tmp_layer)
if conv_layer1.output_shape == keras.backend.shape(x):
tmp_layer = Add()([tmp_layer, input_layer])
else:
scale_conv = Conv2D(filters=conv_layer1.filters, strides=conv_layer1.strides, kernel_size=(1, 1), padding=conv_layer1.padding)
scaled_layer = scale_conv(input_layer)
tmp_layer = Add()([tmp_layer, scaled_layer])
tmp_layer = Activation(activation='relu')(tmp_layer)
return tmp_layer
这可以按预期工作并具有良好的泛化效果(即〜95%,而〜20%)!
但是我无法理解这两种解决方案之间的区别,并希望它们能得出相同的结果。
我想我对定制层有误解,有人可以解释一下错误以及如何正确实现定制层吗?