Question

我正在根据本文https://arxiv.org/pdf/1612.04530.pdf以及Josef Ondrej找到here之前的工作，为Keras开发一个Permutational Equivariant Layer。

图层本身是由多个层组成的Keras模型：

from keras import backend as K
from keras import losses
from keras.layers import Average, Add, Concatenate, Maximum, Input, Dense, Lambda
from keras.models import Model
from keras.engine.topology import Layer

def PermutationEquivariant(input_shape, layer_size, tuple_dim = 2, reduce_fun = "sum", dense_params = {}):
    """
    Implements a permutation equivariant layer.
    Each batch in our data consists of `input_shape[0]` observations
    each with `input_shape[1]` features.
    Args:
    input_shape -- A pair of `int` - (number of observations in one batch x
        number of features of each observation). The batch dimension is not included.
    layer_size -- `int`. Size of dense layer applied to each tuple of observations.
    tuple_dim -- A `int`, how many observations to put in one tuple.
    reduce_fun -- A `string`, type of function to "average" over all tuples starting with the same index.
    Returns:
    g -- A keras Model - the permutation equivariant layer.
        It consists of one tuple layer that creates all possible `tuple_dim`-tuples
        of observations, sorted on an axis along which the first index is constant. 
        The same dense layer is applied on every tuple and then some symmetric pooling function is applied 
        across all tuples with the same first index (for example mean or maximum).
    """

    inputs = Input(shape=input_shape)## input_shape: batch_size x row x col



    ## SeperatedTuple layer 
    x = SeperatedTuples(tuple_dim, input_shape = input_shape)(inputs)## out_shape: batch_size x row x row ** (tuple_dim-1) x tuple_dim*col

    ## Dense layer -- implemented with a conv layer
    # Use the same dense layer for each tuple
    dense_input_shape = (tuple_dim*input_shape[1], ) # batch_size x tuple_dim*col
    dense_layer = Dense(input_shape = dense_input_shape, units=layer_size, **dense_params)

    # iterate through rows
    x_i_list = []
    for i in range(input_shape[0]):
        xi_j_list = []
        # applying the dense layer to each tuple where first index equals i   
        # here we could also use a 1x1 convolution. Instead of reusing
        # the dense layer for each tuple, we would be reusing the kernels     
        for j in range(input_shape[0] ** (tuple_dim-1)):
            input_ij = Lambda(lambda x : x[:,i,j,:], output_shape=(tuple_dim*input_shape[-1],))(x) ##out_shape: batch_size x tuple_dim * col
            xi_j_list += [dense_layer(input_ij)] ## xi_j_list-shape: row x batch_size x layer_size

        ## Pooling layer
        # Pooling the list of the dense outputs of all the tuples where first index equals i to out_shape: batch_size x layer_size 
        # note that axis=0 because in previous step row-axis comes before batch_size-axis
        # Use Lambda Wrapper to preserve the output being a Keras Tensor
        if reduce_fun == "mean":
            pooling_layer = Average(axis=1)
            #pooling_layer = Lambda(lambda x : K.mean(x, axis = 0))
        elif reduce_fun == "max":
            pooling_layer = Maximum()
            #pooling_layer = Lambda(lambda x : K.max(x, axis = 0))
        elif reduce_fun == "sum":
            pooling_layer = Add()
            #pooling_layer = Lambda(lambda x : K.sum(x, axis = 0))
        else:
            raise ValueError("Invalid value for argument `reduce_fun` provided. ")

        xi = pooling_layer(xi_j_list) ## xi-shape: batch_size x layer_size
        x_i_list += [xi]

    # x_i_list-shape: 
    # Concatenate the results of each row
    x = Lambda(lambda x : K.stack(x, axis=1), output_shape = (input_shape[0], layer_size))(x_i_list) ## out_shape: batch_size x row x layer_size

    model = Model(inputs=inputs, outputs=x)
    return model

class SeperatedTuples(Layer):
    """
    Creates all possible tuples of rows of 2D tensor, with an additional axis 
    along which the first elements are constant.
    In the case of tuple_dim = 2, from one input batch:
        x_1,
        x_2,
        ...
        x_n,
    where x_i are rows of the tensor, it creates 3D output tensor:
        [[x_1 | x_1, x_1 | x_2 ... x_1 | x_n],
         [x_2 | x_1, x_2 | x_2 ... x_2 | x_n],
         ...
                               ... x_n | x_n]]
    Args:
    tuple_dim -- A `int`. Dimension of one tuple (i.e. how many rows from the input
    tensor to combine to create a row in output tensor)
    input_shape -- A `tuple` of `int`. In the most frequent case where our data
        has shape (batch_size x num_rows x num_cols) this should be (num_rows x num_cols).
    """

    def __init__(self, tuple_dim = 2, **kwargs):
        self.tuple_dim = tuple_dim
        super(SeperatedTuples, self).__init__(**kwargs)

    def create_indices(self, n, k = 2):
        """
        Creates all integer valued coordinate k-tuples in k dimensional hypercube with edge size n.
        for example n = 4, k = 2
        returns [[0, 0], [0, 1], [0, 2], [0, 3],
                 [1, 0], [1, 1], [1, 2], [1, 3],
                 ...
                 [3, 0], [3, 1], [3, 2], [3, 3]]
        Args:
        n -- A `int`, edge size of the hypercube.
        k -- A `int`, dimension of the hypercube.
        Returns:
        indices_n_k -- A `list` of `list` of `int`. Each inner list represents coordinates of one integer point
            in the hypercube.
        """
        if k == 0:
            indices_n_k = [[]]
        else:
            indices_n_k_minus_1 = self.create_indices(n, k-1)
            indices_n_k = [[i] + indices_n_k_minus_1[c] for i in range(n) for c in range(n**(k-1))]

        return indices_n_k

    def create_seperated_indices(self, n, k = 2):
        """
        Same as create_indices, just that there is an additional axis along which the first value of the tuples is constant
        for example n = 4, k = 2
        returns [[[0, 0], [0, 1], [0, 2], [0, 3]],
                 [[1, 0], [1, 1], [1, 2], [1, 3]],
                 ...
                 [[3, 0], [3, 1], [3, 2], [3, 3]]]

        shape: row x row x k
        """

        indices = self.create_indices(n,k)
        seperated_indices = [indices[i:i + n] for i in range(0, len(indices), n)]
        return seperated_indices

    def build(self, input_shape):
        # Create indexing tuple
        self.gathering_indices = self.create_seperated_indices(input_shape[-2], self.tuple_dim)
        super(SeperatedTuples, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, x):
        """
        input_dim : batch_size x rows x cols
        output_dim : batch_size x rows x rows ** (tuple_dim-1) x cols * tuple_dim
        """


        stacks_of_tuples = K.map_fn(
            fn = lambda z : ## z shape: row x col
                K.stack(
                    [K.concatenate(
                        [K.reshape(
                            K.gather(z, i), ## shape: tuple_dim x col
                            shape = (1,-1)
                         ) ## shape: 1 x tuple_dim*col
                         for i in indices # i-dim: tuple_dim, indices-shape: row x tuple_dim
                        ], ## shape: row x 1 x tuple_dim*col
                        axis = 0
                    ) ## shape: row x tuple_dim*col
                for indices in self.gathering_indices # gathering_indices-shape: row x row x tuple_dim
                ],
                axis=0), ## shape: row x row x tuple_dim*col
            elems = x ## shape: batch_size x row x col
        ) ## shape: batch_size x row x row x tuple_dim*col
        return stacks_of_tuples

    def compute_output_shape(self, input_shape):
        """
        input_shape: batch_size x rows x cols
        output_shape: batch_size x rows x rows ** (tuple_dim-1) x cols * tuple_dim
        """

        output_shape = list(input_shape)
        output_shape[-1] = output_shape[-1] * self.tuple_dim
        output_shape[-2] = output_shape[-2] ** self.tuple_dim
        return tuple(output_shape)

单独测试PermutationEquivariant层时，一切似乎都正常（运行1）。但是，当我尝试将其合并到一个更大的模型中时，输出只会重复（运行2）。

from keras.models import Model
from keras.layers import Input, Lambda
import numpy as np

# parameters for Permutational Equivariant layer
input_shape = (2,5)
dense_params = {'kernel_initializer': 'glorot_normal', 'bias_initializer': 'glorot_normal', 'activation': 'tanh'}


sample = np.random.random((1,) + input_shape)


# run 1: Using only the PermutationEquivariant layer as a model by itself seems to work
model_1 =  PermutationEquivariant(input_shape=input_shape, layer_size=10, tuple_dim=2, reduce_fun="sum", dense_params = dense_params)
model_1.compile(optimizer='sgd', loss='categorical_crossentropy')
print("model_1: \n", model_1.predict(sample))
#model_1:
#[[[-1.0494264  -1.6808903   1.2861781  -0.90004706  1.6178854
#    1.6686234  -1.5724193   1.2454509   0.3730019  -1.4580158 ]
#  [-1.3904197  -1.467866    1.0848606  -1.2094728   1.6304723
#    1.6369174  -1.4074551   0.58116794  0.292305   -1.7162979 ]]]

# run 2: Incorporating the PermutationEquivariant layer inside another model makes the output constant along the first axis
inputs = Input(shape=input_shape)
x = PermutationEquivariant(input_shape=input_shape, layer_size=10, tuple_dim=2, reduce_fun="sum", dense_params = dense_params)(inputs)
model_2 = Model(inputs=inputs,outputs = x)
model_2.compile(optimizer='sgd', loss='categorical_crossentropy')
print("model_2: \n", model_2.predict(sample))
enter code here
#model_2:
# [[[ 0.72823656  1.2213255  -0.28404936  1.4711846  -0.49544945
#    1.7930243  -0.7502286   1.892496   -1.675402   -0.2252224 ]
#   [ 0.72823656  1.2213255  -0.28404936  1.4711846  -0.49544945
#    1.7930243  -0.7502286   1.892496   -1.675402   -0.2252224 ]]]

我尝试过theano和tensorflow作为后端，两者都有相同的结果。有没有人知道为什么它在另一个模型里面时表现不同/我错过了什么？我感谢任何帮助！

内部keras模型

0 个答案: