如何在keras中实现自定义GRU

时间:2018-02-16 22:11:04

标签: python-3.x keras

我正在尝试在keras 2.1.2-py36_0中实现自定义GRU层,我想使用以下门方程式:

z t = act(W z .h t-1 + x t
r t = act(W r .h t-1 + x t
h t = act(W h 。(r * h t-1 )+ x t

而不是keras当前实现的门如下:

z t = act(W z .h t-1 + U z x t
r t = act(W r .h t-1 + U r x t
h t = act(W h 。(r * h t-1 )+ U h x t

为数据自定义GRU单元

类CGRUCell(Layer):

def __init__(self, units,
             activation='tanh',
             recurrent_activation='hard_sigmoid',
             use_bias=True,
             kernel_initializer='glorot_uniform',
             recurrent_initializer='orthogonal',
             bias_initializer='zeros',
             kernel_regularizer=None,
             recurrent_regularizer=None,
             bias_regularizer=None,
             kernel_constraint=None,
             recurrent_constraint=None,
             bias_constraint=None,
             dropout=0.,
             recurrent_dropout=0.,
             implementation=1,
             **kwargs):
    super(CGRUCell, self).__init__(**kwargs)
    self.units = units
    self.activation = activations.get(activation)
    self.recurrent_activation = activations.get(recurrent_activation)
    self.use_bias = use_bias

    self.kernel_initializer = initializers.get(kernel_initializer)
    self.recurrent_initializer = initializers.get(recurrent_initializer)
    self.bias_initializer = initializers.get(bias_initializer)

    self.kernel_regularizer = regularizers.get(kernel_regularizer)
    self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
    self.bias_regularizer = regularizers.get(bias_regularizer)

    self.kernel_constraint = constraints.get(kernel_constraint)
    self.recurrent_constraint = constraints.get(recurrent_constraint)
    self.bias_constraint = constraints.get(bias_constraint)

    self.dropout = min(1., max(0., dropout))
    self.recurrent_dropout = min(1., max(0., recurrent_dropout))
    self.implementation = implementation
    self.state_size = self.units
    self._dropout_mask = None
    self._recurrent_dropout_mask = None

def build(self, input_shape):
    input_dim = input_shape[-1]
    #self.kernel = self.add_weight(shape=(input_dim, self.units * 3),
    #                              name='kernel',
    #                              initializer=self.kernel_initializer,
    #                              regularizer=self.kernel_regularizer,
    #                              constraint=self.kernel_constraint)
    self.recurrent_kernel = self.add_weight(
        shape=(self.units, self.units * 3),
        name='recurrent_kernel',
        initializer=self.recurrent_initializer,
        regularizer=self.recurrent_regularizer,
        constraint=self.recurrent_constraint)

    if self.use_bias:
        self.bias = self.add_weight(shape=(self.units * 3,),
                                    name='bias',
                                    initializer=self.bias_initializer,
                                    regularizer=self.bias_regularizer,
                                    constraint=self.bias_constraint)
    else:
        self.bias = None

    #self.kernel_z = self.kernel[:, :self.units]
    self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units]
    #self.kernel_r = self.kernel[:, self.units: self.units * 2]
    self.recurrent_kernel_r = self.recurrent_kernel[:,
                                                    self.units:
                                                    self.units * 2]
    #self.kernel_h = self.kernel[:, self.units * 2:]
    self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:]

    if self.use_bias:
        self.bias_z = self.bias[:self.units]
        self.bias_r = self.bias[self.units: self.units * 2]
        self.bias_h = self.bias[self.units * 2:]
    else:
        self.bias_z = None
        self.bias_r = None
        self.bias_h = None
    self.built = True

def call(self, inputs, states, training=None):
    h_tm1 = states[0]  # previous memory

    if 0 < self.dropout < 1 and self._dropout_mask is None:
        self._dropout_mask = _generate_dropout_mask(
            _generate_dropout_ones(inputs, K.shape(inputs)[-1]),
            self.dropout,
            training=training,
            count=3)
    if (0 < self.recurrent_dropout < 1 and
            self._recurrent_dropout_mask is None):
        self._recurrent_dropout_mask = _generate_dropout_mask(
            _generate_dropout_ones(inputs, self.units),
            self.recurrent_dropout,
            training=training,
            count=3)

    # dropout matrices for input units
    dp_mask = self._dropout_mask
    # dropout matrices for recurrent units
    rec_dp_mask = self._recurrent_dropout_mask

    if self.implementation == 1:
        if 0. < self.dropout < 1.:
            inputs_z = inputs * dp_mask[0]
            inputs_r = inputs * dp_mask[1]
            inputs_h = inputs * dp_mask[2]
        else:
            inputs_z = inputs
            inputs_r = inputs
            inputs_h = inputs
        print(inputs)    
        # Custom implementation of inputs which are already embedding parameters
        #x_z = K.dot(inputs_z, self.kernel_z)
        #x_r = K.dot(inputs_r, self.kernel_r)
        #x_h = K.dot(inputs_h, self.kernel_h)
        #if self.use_bias:
        #    x_z = K.bias_add(x_z, self.bias_z)
        #    x_r = K.bias_add(x_r, self.bias_r)
        #    x_h = K.bias_add(x_h, self.bias_h)
        x_z = inputs_z
        x_r = inputs_r
        x_h = inputs_h

        if 0. < self.recurrent_dropout < 1.:
            h_tm1_z = h_tm1 * rec_dp_mask[0]
            h_tm1_r = h_tm1 * rec_dp_mask[1]
            h_tm1_h = h_tm1 * rec_dp_mask[2]
        else:
            h_tm1_z = h_tm1
            h_tm1_r = h_tm1
            h_tm1_h = h_tm1
        z = self.recurrent_activation(x_z + K.dot(h_tm1_z,
                                                  self.recurrent_kernel_z))
        r = self.recurrent_activation(x_r + K.dot(h_tm1_r,
                                                  self.recurrent_kernel_r))

        hh = self.activation(x_h + K.dot(r * h_tm1_h,
                                         self.recurrent_kernel_h))
    else:
        if 0. < self.dropout < 1.:
            inputs *= dp_mask[0]

        # Custom implementation of inputs which are already embedding parameters
        #matrix_x = K.dot(inputs, self.kernel)
        #if self.use_bias:
        #    matrix_x = K.bias_add(matrix_x, self.bias)
        matrix_x = inputs

        if 0. < self.recurrent_dropout < 1.:
            h_tm1 *= rec_dp_mask[0]
        matrix_inner = K.dot(h_tm1,
                             self.recurrent_kernel[:, :2 * self.units])

        x_z = matrix_x[:, :self.units]
        x_r = matrix_x[:, self.units: 2 * self.units]
        recurrent_z = matrix_inner[:, :self.units]
        recurrent_r = matrix_inner[:, self.units: 2 * self.units]

        z = self.recurrent_activation(x_z + recurrent_z)
        r = self.recurrent_activation(x_r + recurrent_r)

        x_h = matrix_x[:, 2 * self.units:]
        recurrent_h = K.dot(r * h_tm1,
                            self.recurrent_kernel[:, 2 * self.units:])
        hh = self.activation(x_h + recurrent_h)
    h = z * h_tm1 + (1 - z) * hh
    if 0 < self.dropout + self.recurrent_dropout:
        if training is None:
            h._uses_learning_phase = True
    return h, [h]

def get_config(self):
    config = {'units': self.units,
              'activation': activations.serialize(self.activation),
              'recurrent_activation': activations.serialize(self.recurrent_activation),
              'use_bias': self.use_bias,
              'kernel_initializer': initializers.serialize(self.kernel_initializer),
              'recurrent_initializer': initializers.serialize(self.recurrent_initializer),
              'bias_initializer': initializers.serialize(self.bias_initializer),
              'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
              'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),
              'bias_regularizer': regularizers.serialize(self.bias_regularizer),
              'kernel_constraint': constraints.serialize(self.kernel_constraint),
              'recurrent_constraint': constraints.serialize(self.recurrent_constraint),
              'bias_constraint': constraints.serialize(self.bias_constraint),
              'dropout': self.dropout,
              'recurrent_dropout': self.recurrent_dropout,
              'implementation': self.implementation}
    base_config = super(CGRUCell, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

CGRU类(RNN):

@interfaces.legacy_recurrent_support
def __init__(self, units,
             activation='tanh',
             recurrent_activation='hard_sigmoid',
             use_bias=True,
             kernel_initializer='glorot_uniform',
             recurrent_initializer='orthogonal',
             bias_initializer='zeros',
             kernel_regularizer=None,
             recurrent_regularizer=None,
             bias_regularizer=None,
             activity_regularizer=None,
             kernel_constraint=None,
             recurrent_constraint=None,
             bias_constraint=None,
             dropout=0.,
             recurrent_dropout=0.,
             implementation=1,
             return_sequences=False,
             return_state=False,
             go_backwards=False,
             stateful=False,
             unroll=False,
             **kwargs):
    if implementation == 0:
        warnings.warn('`implementation=0` has been deprecated, '
                      'and now defaults to `implementation=1`.'
                      'Please update your layer call.')

    cell = CGRUCell(units,
                   activation=activation,
                   recurrent_activation=recurrent_activation,
                   use_bias=use_bias,
                   kernel_initializer=kernel_initializer,
                   recurrent_initializer=recurrent_initializer,
                   bias_initializer=bias_initializer,
                   kernel_regularizer=kernel_regularizer,
                   recurrent_regularizer=recurrent_regularizer,
                   bias_regularizer=bias_regularizer,
                   kernel_constraint=kernel_constraint,
                   recurrent_constraint=recurrent_constraint,
                   bias_constraint=bias_constraint,
                   dropout=dropout,
                   recurrent_dropout=recurrent_dropout,
                   implementation=implementation)
    super(CGRU, self).__init__(cell,
                              return_sequences=return_sequences,
                              return_state=return_state,
                              go_backwards=go_backwards,
                              stateful=stateful,
                              unroll=unroll,
                              **kwargs)
    self.activity_regularizer = regularizers.get(activity_regularizer)

def call(self, inputs, mask=None, training=None, initial_state=None):
    self.cell._dropout_mask = None
    self.cell._recurrent_dropout_mask = None
    return super(CGRU, self).call(inputs,
                                 mask=mask,
                                 training=training,
                                 initial_state=initial_state)

@property
def units(self):
    return self.cell.units

@property
def activation(self):
    return self.cell.activation

@property
def recurrent_activation(self):
    return self.cell.recurrent_activation

@property
def use_bias(self):
    return self.cell.use_bias

@property
def kernel_initializer(self):
    return self.cell.kernel_initializer

@property
def recurrent_initializer(self):
    return self.cell.recurrent_initializer

@property
def bias_initializer(self):
    return self.cell.bias_initializer

@property
def kernel_regularizer(self):
    return self.cell.kernel_regularizer

@property
def recurrent_regularizer(self):
    return self.cell.recurrent_regularizer

@property
def bias_regularizer(self):
    return self.cell.bias_regularizer

@property
def kernel_constraint(self):
    return self.cell.kernel_constraint

@property
def recurrent_constraint(self):
    return self.cell.recurrent_constraint

@property
def bias_constraint(self):
    return self.cell.bias_constraint

@property
def dropout(self):
    return self.cell.dropout

@property
def recurrent_dropout(self):
    return self.cell.recurrent_dropout

@property
def implementation(self):
    return self.cell.implementation

def get_config(self):
    config = {'units': self.units,
              'activation': activations.serialize(self.activation),
              'recurrent_activation': activations.serialize(self.recurrent_activation),
              'use_bias': self.use_bias,
              'kernel_initializer': initializers.serialize(self.kernel_initializer),
              'recurrent_initializer': initializers.serialize(self.recurrent_initializer),
              'bias_initializer': initializers.serialize(self.bias_initializer),
              'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
              'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),
              'bias_regularizer': regularizers.serialize(self.bias_regularizer),
              'activity_regularizer': regularizers.serialize(self.activity_regularizer),
              'kernel_constraint': constraints.serialize(self.kernel_constraint),
              'recurrent_constraint': constraints.serialize(self.recurrent_constraint),
              'bias_constraint': constraints.serialize(self.bias_constraint),
              'dropout': self.dropout,
              'recurrent_dropout': self.recurrent_dropout,
              'implementation': self.implementation}
    base_config = super(CGRU, self).get_config()
    del base_config['cell']
    return dict(list(base_config.items()) + list(config.items()))

@classmethod
def from_config(cls, config):
    if 'implementation' in config and config['implementation'] == 0:
        config['implementation'] = 1
    return cls(**config)

模型实现如下:

    user_input = Input(batch_shape=(batch_size,chunk_size,), dtype='int32', name='user_inputs')
    user_emb = Embedding(input_dim=num_users+1, output_dim=out_dim, input_length=chunk_size)(user_input)
    item_input = Input(batch_shape=(batch_size,chunk_size,), dtype='int32', name='item_inputs')
    item_emb = Embedding(input_dim=num_items+1, output_dim=out_dim, input_length=chunk_size)(item_input)
    inputs = keras.layers.add([user_emb, item_emb])

    gru_args = {
        "units":hidden_size,
        "return_sequences":True,
        #"return_state":True,
        "stateful":True,
        "unroll":False
    }
    gru = CGRU(**gru_args)(inputs)
    outputs = Dense(num_items+1, activation='softmax')(gru)

    [recc_model = Model(inputs=\[user_input,item_input\], outputs=outputs)
    recc_model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=\[metrics.cate][1]gorical_accuracy])
              #metrics=[metrics.sparse_categorical_accuracy])

但是在运行代码时我得到以下错误,这似乎是由于渐变计算到无:

ValueError: Tried to convert 'x' to a tensor and failed. Error: None values not supported.

在此处查找完整错误:https://pastebin.com/n9UzCRiP

1 个答案:

答案 0 :(得分:1)

发生错误是因为偏差权重已添加到模型中但未在任何地方使用。

当您致电self.add_weight(...)时,您必须确保在模型中的某处使用这些权重。否则,由于这些权重没有连接到损失张量,TF无法计算梯度,并且会引发错误。

如果您不需要偏差权重,则可以删除add_weight行,或在单元格中设置use_bias=False

另外,我认为您不需要重新实现CGRU图层来使用自定义单元格。只需使用内置的RNN图层包装自定义单元格即可。

gru = RNN(CGRUCell(hidden_size, use_bias=False),
          return_sequences=True,
          stateful=True,
          unroll=False)(inputs)