我正在尝试在keras 2.1.2-py36_0中实现自定义GRU层,我想使用以下门方程式:
z t = act(W z .h t-1 + x t )
r t = act(W r .h t-1 + x t )
h t = act(W h 。(r * h t-1 )+ x t )
而不是keras当前实现的门如下:
z t = act(W z .h t-1 + U z x t )
r t = act(W r .h t-1 + U r x t )
h t = act(W h 。(r * h t-1 )+ U h x t )
为数据自定义GRU单元
类CGRUCell(Layer):
def __init__(self, units,
activation='tanh',
recurrent_activation='hard_sigmoid',
use_bias=True,
kernel_initializer='glorot_uniform',
recurrent_initializer='orthogonal',
bias_initializer='zeros',
kernel_regularizer=None,
recurrent_regularizer=None,
bias_regularizer=None,
kernel_constraint=None,
recurrent_constraint=None,
bias_constraint=None,
dropout=0.,
recurrent_dropout=0.,
implementation=1,
**kwargs):
super(CGRUCell, self).__init__(**kwargs)
self.units = units
self.activation = activations.get(activation)
self.recurrent_activation = activations.get(recurrent_activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.recurrent_initializer = initializers.get(recurrent_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.recurrent_constraint = constraints.get(recurrent_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.dropout = min(1., max(0., dropout))
self.recurrent_dropout = min(1., max(0., recurrent_dropout))
self.implementation = implementation
self.state_size = self.units
self._dropout_mask = None
self._recurrent_dropout_mask = None
def build(self, input_shape):
input_dim = input_shape[-1]
#self.kernel = self.add_weight(shape=(input_dim, self.units * 3),
# name='kernel',
# initializer=self.kernel_initializer,
# regularizer=self.kernel_regularizer,
# constraint=self.kernel_constraint)
self.recurrent_kernel = self.add_weight(
shape=(self.units, self.units * 3),
name='recurrent_kernel',
initializer=self.recurrent_initializer,
regularizer=self.recurrent_regularizer,
constraint=self.recurrent_constraint)
if self.use_bias:
self.bias = self.add_weight(shape=(self.units * 3,),
name='bias',
initializer=self.bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
#self.kernel_z = self.kernel[:, :self.units]
self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units]
#self.kernel_r = self.kernel[:, self.units: self.units * 2]
self.recurrent_kernel_r = self.recurrent_kernel[:,
self.units:
self.units * 2]
#self.kernel_h = self.kernel[:, self.units * 2:]
self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:]
if self.use_bias:
self.bias_z = self.bias[:self.units]
self.bias_r = self.bias[self.units: self.units * 2]
self.bias_h = self.bias[self.units * 2:]
else:
self.bias_z = None
self.bias_r = None
self.bias_h = None
self.built = True
def call(self, inputs, states, training=None):
h_tm1 = states[0] # previous memory
if 0 < self.dropout < 1 and self._dropout_mask is None:
self._dropout_mask = _generate_dropout_mask(
_generate_dropout_ones(inputs, K.shape(inputs)[-1]),
self.dropout,
training=training,
count=3)
if (0 < self.recurrent_dropout < 1 and
self._recurrent_dropout_mask is None):
self._recurrent_dropout_mask = _generate_dropout_mask(
_generate_dropout_ones(inputs, self.units),
self.recurrent_dropout,
training=training,
count=3)
# dropout matrices for input units
dp_mask = self._dropout_mask
# dropout matrices for recurrent units
rec_dp_mask = self._recurrent_dropout_mask
if self.implementation == 1:
if 0. < self.dropout < 1.:
inputs_z = inputs * dp_mask[0]
inputs_r = inputs * dp_mask[1]
inputs_h = inputs * dp_mask[2]
else:
inputs_z = inputs
inputs_r = inputs
inputs_h = inputs
print(inputs)
# Custom implementation of inputs which are already embedding parameters
#x_z = K.dot(inputs_z, self.kernel_z)
#x_r = K.dot(inputs_r, self.kernel_r)
#x_h = K.dot(inputs_h, self.kernel_h)
#if self.use_bias:
# x_z = K.bias_add(x_z, self.bias_z)
# x_r = K.bias_add(x_r, self.bias_r)
# x_h = K.bias_add(x_h, self.bias_h)
x_z = inputs_z
x_r = inputs_r
x_h = inputs_h
if 0. < self.recurrent_dropout < 1.:
h_tm1_z = h_tm1 * rec_dp_mask[0]
h_tm1_r = h_tm1 * rec_dp_mask[1]
h_tm1_h = h_tm1 * rec_dp_mask[2]
else:
h_tm1_z = h_tm1
h_tm1_r = h_tm1
h_tm1_h = h_tm1
z = self.recurrent_activation(x_z + K.dot(h_tm1_z,
self.recurrent_kernel_z))
r = self.recurrent_activation(x_r + K.dot(h_tm1_r,
self.recurrent_kernel_r))
hh = self.activation(x_h + K.dot(r * h_tm1_h,
self.recurrent_kernel_h))
else:
if 0. < self.dropout < 1.:
inputs *= dp_mask[0]
# Custom implementation of inputs which are already embedding parameters
#matrix_x = K.dot(inputs, self.kernel)
#if self.use_bias:
# matrix_x = K.bias_add(matrix_x, self.bias)
matrix_x = inputs
if 0. < self.recurrent_dropout < 1.:
h_tm1 *= rec_dp_mask[0]
matrix_inner = K.dot(h_tm1,
self.recurrent_kernel[:, :2 * self.units])
x_z = matrix_x[:, :self.units]
x_r = matrix_x[:, self.units: 2 * self.units]
recurrent_z = matrix_inner[:, :self.units]
recurrent_r = matrix_inner[:, self.units: 2 * self.units]
z = self.recurrent_activation(x_z + recurrent_z)
r = self.recurrent_activation(x_r + recurrent_r)
x_h = matrix_x[:, 2 * self.units:]
recurrent_h = K.dot(r * h_tm1,
self.recurrent_kernel[:, 2 * self.units:])
hh = self.activation(x_h + recurrent_h)
h = z * h_tm1 + (1 - z) * hh
if 0 < self.dropout + self.recurrent_dropout:
if training is None:
h._uses_learning_phase = True
return h, [h]
def get_config(self):
config = {'units': self.units,
'activation': activations.serialize(self.activation),
'recurrent_activation': activations.serialize(self.recurrent_activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'recurrent_initializer': initializers.serialize(self.recurrent_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'recurrent_constraint': constraints.serialize(self.recurrent_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint),
'dropout': self.dropout,
'recurrent_dropout': self.recurrent_dropout,
'implementation': self.implementation}
base_config = super(CGRUCell, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
CGRU类(RNN):
@interfaces.legacy_recurrent_support
def __init__(self, units,
activation='tanh',
recurrent_activation='hard_sigmoid',
use_bias=True,
kernel_initializer='glorot_uniform',
recurrent_initializer='orthogonal',
bias_initializer='zeros',
kernel_regularizer=None,
recurrent_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
recurrent_constraint=None,
bias_constraint=None,
dropout=0.,
recurrent_dropout=0.,
implementation=1,
return_sequences=False,
return_state=False,
go_backwards=False,
stateful=False,
unroll=False,
**kwargs):
if implementation == 0:
warnings.warn('`implementation=0` has been deprecated, '
'and now defaults to `implementation=1`.'
'Please update your layer call.')
cell = CGRUCell(units,
activation=activation,
recurrent_activation=recurrent_activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
recurrent_initializer=recurrent_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
recurrent_regularizer=recurrent_regularizer,
bias_regularizer=bias_regularizer,
kernel_constraint=kernel_constraint,
recurrent_constraint=recurrent_constraint,
bias_constraint=bias_constraint,
dropout=dropout,
recurrent_dropout=recurrent_dropout,
implementation=implementation)
super(CGRU, self).__init__(cell,
return_sequences=return_sequences,
return_state=return_state,
go_backwards=go_backwards,
stateful=stateful,
unroll=unroll,
**kwargs)
self.activity_regularizer = regularizers.get(activity_regularizer)
def call(self, inputs, mask=None, training=None, initial_state=None):
self.cell._dropout_mask = None
self.cell._recurrent_dropout_mask = None
return super(CGRU, self).call(inputs,
mask=mask,
training=training,
initial_state=initial_state)
@property
def units(self):
return self.cell.units
@property
def activation(self):
return self.cell.activation
@property
def recurrent_activation(self):
return self.cell.recurrent_activation
@property
def use_bias(self):
return self.cell.use_bias
@property
def kernel_initializer(self):
return self.cell.kernel_initializer
@property
def recurrent_initializer(self):
return self.cell.recurrent_initializer
@property
def bias_initializer(self):
return self.cell.bias_initializer
@property
def kernel_regularizer(self):
return self.cell.kernel_regularizer
@property
def recurrent_regularizer(self):
return self.cell.recurrent_regularizer
@property
def bias_regularizer(self):
return self.cell.bias_regularizer
@property
def kernel_constraint(self):
return self.cell.kernel_constraint
@property
def recurrent_constraint(self):
return self.cell.recurrent_constraint
@property
def bias_constraint(self):
return self.cell.bias_constraint
@property
def dropout(self):
return self.cell.dropout
@property
def recurrent_dropout(self):
return self.cell.recurrent_dropout
@property
def implementation(self):
return self.cell.implementation
def get_config(self):
config = {'units': self.units,
'activation': activations.serialize(self.activation),
'recurrent_activation': activations.serialize(self.recurrent_activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'recurrent_initializer': initializers.serialize(self.recurrent_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'recurrent_constraint': constraints.serialize(self.recurrent_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint),
'dropout': self.dropout,
'recurrent_dropout': self.recurrent_dropout,
'implementation': self.implementation}
base_config = super(CGRU, self).get_config()
del base_config['cell']
return dict(list(base_config.items()) + list(config.items()))
@classmethod
def from_config(cls, config):
if 'implementation' in config and config['implementation'] == 0:
config['implementation'] = 1
return cls(**config)
模型实现如下:
user_input = Input(batch_shape=(batch_size,chunk_size,), dtype='int32', name='user_inputs')
user_emb = Embedding(input_dim=num_users+1, output_dim=out_dim, input_length=chunk_size)(user_input)
item_input = Input(batch_shape=(batch_size,chunk_size,), dtype='int32', name='item_inputs')
item_emb = Embedding(input_dim=num_items+1, output_dim=out_dim, input_length=chunk_size)(item_input)
inputs = keras.layers.add([user_emb, item_emb])
gru_args = {
"units":hidden_size,
"return_sequences":True,
#"return_state":True,
"stateful":True,
"unroll":False
}
gru = CGRU(**gru_args)(inputs)
outputs = Dense(num_items+1, activation='softmax')(gru)
[recc_model = Model(inputs=\[user_input,item_input\], outputs=outputs)
recc_model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=\[metrics.cate][1]gorical_accuracy])
#metrics=[metrics.sparse_categorical_accuracy])
但是在运行代码时我得到以下错误,这似乎是由于渐变计算到无:
ValueError: Tried to convert 'x' to a tensor and failed. Error: None values not supported.
在此处查找完整错误:https://pastebin.com/n9UzCRiP
答案 0 :(得分:1)
发生错误是因为偏差权重已添加到模型中但未在任何地方使用。
当您致电self.add_weight(...)
时,您必须确保在模型中的某处使用这些权重。否则,由于这些权重没有连接到损失张量,TF无法计算梯度,并且会引发错误。
如果您不需要偏差权重,则可以删除add_weight
行,或在单元格中设置use_bias=False
。
另外,我认为您不需要重新实现CGRU
图层来使用自定义单元格。只需使用内置的RNN
图层包装自定义单元格即可。
gru = RNN(CGRUCell(hidden_size, use_bias=False),
return_sequences=True,
stateful=True,
unroll=False)(inputs)