我正在尝试构建一个用于序列预测的分层模型。我的输入是一个序列 (372*49),我的输出也是一个序列 (372*2)。该模型的详细信息如下。
class Local(tf.keras.layers.Layer):
def __init__(self, latent_dim=128,
intermediate_dim=256, name='local', **kwargs):
super(Local, self).__init__(name=name, **kwargs)
self.cnn = tf.keras.layers.Conv2D(filters=64, kernel_size=(7,1), padding='valid',
input_shape=(7,7,1), activation='relu')
self.bn = tf.keras.layers.BatchNormalization()
self.dropout = tf.keras.layers.Dropout(0.2)
self.reshape = tf.keras.layers.Reshape((7,64))
self.lstm = tf.keras.layers.LSTM(200, return_sequences=False, stateful=False)
self.dense_1 = tf.keras.layers.Dense(intermediate_dim, activation='relu')
self.dense_2 = tf.keras.layers.Dense(latent_dim, activation='relu')
def call(self, batch_inputs):
cnn_bn = self.bn(self.cnn(batch_inputs))
cnn_drop = self.dropout(cnn_bn)
lstm_inp = self.reshape(cnn_drop)
lstm_out = self.lstm(lstm_inp)
lstm_drop = self.dropout(lstm_out)
dense_1 = self.dropout(self.dense_1(lstm_drop))
local_out = self.dense_2(dense_1)
return local_out
class Global(tf.keras.layers.Layer):
def __init__(self, max_subs = 372, latent_dim = 128):
super(Global, self).__init__()
self.max_subs = max_subs
self.bottom_inp_dim = latent_dim
self.mask = tf.keras.layers.Masking(mask_value= -1, input_shape = (self.max_subs, self.bottom_inp_dim))
self.rnn = tf.keras.layers.LSTM(128, return_sequences = True)
self.dense_1 = tf.keras.layers.Dense(64, activation = 'relu') #tf.keras.layers.BatchNormalization(),
self.dense_2 = tf.keras.layers.Dense(32, activation = 'relu')
self.out = tf.keras.layers.Dense(2, activation = 'sigmoid')
def call(self, batch_inputs):
x = batch_inputs
mask = self.mask(x)
h = self.rnn(mask)
h_d_1 = self.dense_1(h)
h_d_2 = self.dense_2(h_d_1)
y = self.out(h_d_2)
return y
class Seq(tf.keras.Model):
def __init__(self, latent_dim=128, name='seq', **kwargs):
super(Seq, self).__init__(name=name, **kwargs)
self.local_out_dim = latent_dim
self.local = Local()
self.pred = Global()
def call(self, batch_inputs):
batch_output = np.zeros((batch_inputs.shape[0], batch_inputs.shape[1], 2))
for j in range(0, batch_inputs.shape[0]):
temp = np.zeros((batch_inputs.shape[1], self.local_out_dim))
for i in range(0,batch_inputs.shape[1]):
inp = batch_inputs[j,i,:]
inp = tf.reshape(inp, [1, 7, 7, 1])
temp[i,:] = self.local(inp)
seqinp = tf.reshape(temp, [1, batch_inputs.shape[1], self.local_out_dim])
batch_output[j,:,:] = self.pred(seqinp)
return batch_output
Local 层用于提取输入项内的局部依赖性,Global 层用于学习特定序列的输入项之间的顺序依赖性。
t = np.random.random(size=(128, 372, 49))
te = np.random.random(size=(128, 372, 2))
seq = Seq()
train_dataset = tf.data.Dataset.from_tensor_slices((t, te))
train_dataset = train_dataset.shuffle(buffer_size=128).batch(4)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
mse_loss_fn = tf.keras.losses.MeanSquaredError()
loss_metric = tf.keras.metrics.Mean()
vae = Seq()
for epoch in range(3):
print('Start of epoch %d' % (epoch,))
for (step, (x_batch_train, y_batch_train)) in enumerate(train_dataset):
with tf.GradientTape() as tape:
pred = seq(x_batch_train)
loss = mse_loss_fn (y_batch_train, pred)
grads = tape.gradient(loss, seq.trainable_variables)
optimizer.apply_gradients(zip(grads, seq.trainable_variables))
loss_metric(loss)
if step % 100 == 0:
print('step %s: mean loss = %s' % (step, loss_metric.result()))
优化损失函数时梯度为None,误差为:
---> 32 optimizer.apply_gradients(zip(grads, seq.trainable_variables))
33 loss_metric(loss)
34
~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py in apply_gradients(self, grads_and_vars, name, experimental_aggregate_gradients)
511 ValueError: If none of the variables have gradients.
512 """
--> 513 grads_and_vars = _filter_grads(grads_and_vars)
514 var_list = [v for (_, v) in grads_and_vars]
515
~\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py in _filter_grads(grads_and_vars)
1269 if not filtered:
1270 raise ValueError("No gradients provided for any variable: %s." %
-> 1271 ([v.name for _, v in grads_and_vars],))
1272 if vars_with_empty_grads:
1273 logging.warning(
ValueError: No gradients provided for any variable: ['seq/local/conv2d_52/kernel:0', 'seq/local/conv2d_52/bias:0', 'seq/local/batch_normalization_52/gamma:0', 'seq/local/batch_normalization_52/beta:0', 'seq/local/lstm_97/lstm_cell_97/kernel:0', 'seq/local/lstm_97/lstm_cell_97/recurrent_kernel:0', 'seq/local/lstm_97/lstm_cell_97/bias:0', 'seq/local/dense_244/kernel:0', 'seq/local/dense_244/bias:0', 'seq/local/dense_245/kernel:0', 'seq/local/dense_245/bias:0', 'seq/global_45/lstm_98/lstm_cell_98/kernel:0', 'seq/global_45/lstm_98/lstm_cell_98/recurrent_kernel:0', 'seq/global_45/lstm_98/lstm_cell_98/bias:0', 'seq/global_45/dense_246/kernel:0', 'seq/global_45/dense_246/bias:0', 'seq/global_45/dense_247/kernel:0', 'seq/global_45/dense_247/bias:0', 'seq/global_45/dense_248/kernel:0', 'seq/global_45/dense_248/bias:0'].