Question

我有一个简单的模型，并尝试在线性层之后应用批标准化的工作方式。它似乎根本没有规范化，因为默认情况下，它已被初始化为keras中的身份。在pytorch中使用相同的权重检查后，其批次归一化确实会进行更改。请看下面。为什么以及在模型中有什么问题？

编辑：一个独立的示例，该示例同时打印来自keras和pytorch模型的结果以进行视觉比较。要使用批处理规范化层，请取消注释指出它的几行，然后再次比较结果。

import tensorflow as tf
import numpy as np
from collections import OrderedDict

from tensorflow.python.keras import layers 
from tensorflow.python.keras import models

import torch
from torch import nn
from torch.nn import functional as F


from tensorflow.contrib import eager as tfe
tfe.enable_eager_execution()


class PytorchModel(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels):
        super().__init__()

        self.linear = nn.Linear(in_channels, out_channels, bias=True)
        self.norm = nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01)

    def forward(self, inputs):
        x = self.linear(inputs)
        ## uncomment for batch normalization
        # x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, 1).contiguous()
        x = F.relu(x)
        return x

class KerasModel(models.Model):
    def __init__(self,
                 num_filters):
        super(KerasModel, self).__init__()

        my_layers = []
        BN = layers.BatchNormalization(name='my_bn', momentum=0.01, epsilon=1e-3)
        LIN = layers.Dense(num_filters, name='my_linear', activation=None, use_bias=True)
        my_layers.append([LIN, BN])
        self.my_layers = my_layers

    def call(self, ins):
        x = self.my_layers[0][0](tf.convert_to_tensor(ins))
        ## uncomment for batch normalization
        # x = self.my_layers[0][1](x)
        x = tf.nn.relu(x)
        return x

if __name__ == '__main__':

    # create dummy input
    np.random.seed(0)
    input_np = np.random.rand(4,5,6)
    filters = 8

    keras_l = KerasModel(num_filters=filters)

    tf_features = keras_l(tf.convert_to_tensor(input_np))

    pytorch_l = PytorchModel(in_channels=6,
                             out_channels=filters)

    # copy weights from keras model to pytorch model
    new_state_dict = OrderedDict()
    new_state_dict['linear.weight'] = torch.from_numpy(np.transpose(keras_l.layers[0].weights[0].numpy(), (1, 0)))
    new_state_dict['linear.bias'] = torch.from_numpy(keras_l.layers[0].bias.numpy())
    ## uncomment for batch normalization
    # new_state_dict['norm.weight'] = torch.from_numpy(keras_l.layers[1].weights[0].numpy())  # gamma
    # new_state_dict['norm.bias'] = torch.from_numpy(keras_l.layers[1].weights[1].numpy())  # bias
    # new_state_dict['norm.running_mean'] = torch.from_numpy(keras_l.layers[1].weights[2].numpy())
    # new_state_dict['norm.running_var'] = torch.from_numpy(keras_l.layers[1].weights[3].numpy())
    pytorch_l.load_state_dict(new_state_dict, strict=False)

    batch_input_voxels_np = torch.from_numpy(input_np).float()
    batch_pytorch_features = pytorch_l.forward(batch_input_voxels_np)

    # => check how results differ, when batch normalization is applied.
    print(tf_features[0, 0, :])
    print(pytorch_features[0, 0, :])

Answer 1

别忘了

pytorch_l.eval()

和

with torch.no_grad():

Answer 2

尝试这个。

with torch.no_grad():
    pytorch_l.eval()
    batch_pytorch_features = pytorch_l(batch_input_voxels_np)

批处理规范化在keras和pytorch中似乎不一样

2 个答案: