在Keras中使用model.save()时出现NotImplementedError
我正在尝试使用Keras来实现简单的编码器-解码器模型。我没有使用Sequential()。我尝试了Model Subclass。我成功地训练了模型,但无法保存。
最初,我没有为模型的第一层添加input_shape
。但是,当我将其添加到模型中时,发生了相同的错误。而且我不知道如何保存它。
import tensorflow as tf
import numpy as np
import time
import matplotlib.pyplot as plt
import caption_preprocessing
import train_dataset_preparation
tf.enable_eager_execution()
# some parameters
voc_size = len(caption_preprocessing.tokenizer.word_index) # voc_size = 10001
embedding_output_dim = 256
lstm_output_dim = 512
encoder_output_dim = embedding_output_dim
EPOCHS = 1
# model definition
class EncoderPart(tf.keras.Model):
def __init__(self):
super(EncoderPart, self).__init__()
self.fc = tf.keras.layers.Dense(input_shape=(64, 2048), units=encoder_output_dim)
def call(self, inputs, training=None, mask=None):
y = self.fc(inputs)
output = tf.nn.relu(y)
return output
class EmbeddingPart(tf.keras.Model):
def __init__(self):
super(EmbeddingPart, self).__init__()
self.embedding = tf.keras.layers.Embedding(input_shape=(1,),
input_dim=voc_size,
output_dim=embedding_output_dim)
def call(self, inputs, training=None, mask=None):
output = self.embedding(inputs)
return output
class DecoderPart(tf.keras.Model):
def __init__(self):
super(DecoderPart, self).__init__()
self.lstm = tf.keras.layers.CuDNNLSTM(input_shape=(64, embedding_output_dim),
units=lstm_output_dim,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
self.fc = tf.keras.layers.Dense(units=voc_size)
def call(self, inputs, training=None, mask=None):
y, state_h, state_c = self.lstm(inputs)
output = self.fc(y)
return output, state_c
def reset_states(self, batch_size):
return tf.zeros((batch_size, lstm_output_dim))
def _loss_function(real, pred):
mask = 1 - np.equal(real, 0) # ignore the loss of padding words
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) * mask
return tf.reduce_mean(loss)
if __name__ == '__main__':
# training model
encoder = EncoderPart()
embedding = EmbeddingPart()
decoder = DecoderPart()
optimizer = tf.train.AdamOptimizer()
loss_plot = []
time_per_epoch = time.time()
for epoch in range(EPOCHS):
start_time = time.time()
total_loss = 0
for (batch, (image_feature_vec, caption)) in enumerate(train_dataset_preparation.dataset):
# print(batch+1, caption.shape)
batch_loss = 0
decoder.reset_states(caption.shape[0]) # reset the states of the lstm layer
start_vec = [caption_preprocessing.tokenizer.word_index['<start>']] * caption.shape[0]
dec_input = tf.expand_dims(start_vec, 1) # shape=(BATCH_SIZE, 1)
with tf.GradientTape() as tape:
image_feature = encoder(image_feature_vec)
_, state = decoder(image_feature) # _.shape=(BATCH_SIZE, 64, 10001), state.shape=(BATCH_SIZE, 512)
for i in range(1, caption.shape[1]):
word_embedding = embedding(dec_input) # shape=(BATCH_SIZE, 1, embedding_output_dim)
predictions, _ = decoder(word_embedding) # shape=(BATCH_SIZE, 1, voc_size)
predictions = tf.reshape(predictions, (caption.shape[0], -1)) # shape=(BATCH_SIZE, embedding_output_dim)
batch_loss += _loss_function(caption[:, i], predictions)
dec_input = tf.expand_dims(caption[:, i], 1)
total_loss += (batch_loss / int(caption.shape[1]))
variables = encoder.variables + embedding.variables + decoder.variables
gradients = tape.gradient(batch_loss, variables)
optimizer.apply_gradients(zip(gradients, variables), tf.train.get_or_create_global_step())
if (batch + 1) % 100 == 0:
print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
batch + 1,
batch_loss.numpy() / int(caption.shape[1])))
loss_plot.append(total_loss / len(caption_preprocessing.train_cap_vec))
print('Epoch {} Loss {:.6f}'.format(epoch + 1,
total_loss / len(caption_preprocessing.train_cap_vec)))
print('Time taken for 1 epoch {} sec\n'.format(time.time() - start_time))
time_per_epoch = (time.time() - time_per_epoch) / EPOCHS
# loss plot
plt.plot(loss_plot)
plt.xlabel('epochs')
plt.ylabel('loss')
plt.title('loss plot')
plt.show()
# save model
save_path = r'\20190326'
encoder _save_file = r'C:\Users\LZ_Jaja\PycharmProjects\Show_and_Tell_190314\model_and_eval' + save_path + r'\encoder.h5'
tf.keras.models.save_model(encoder, embedding_w_save_file)
我尝试了model.save()
和tf.keras.models.save_model()
。他们两个都提出相同的错误。错误如下:
回溯(最近通话最近): 文件“ C:/Users/LZ_Jaja/PycharmProjects/Show_and_Tell_190314/model_train_and_save.py”,第138行,在 tf.keras.models.save_model(编码器,embedding_w_save_file) 第101行的文件“ C:\ Users \ LZ_Jaja \ Anaconda3 \ envs \ tensorflow_gpu \ lib \ site-packages \ tensorflow \ python \ keras \ engine \ saving.py”在save_model中 'config':model.get_config() 第1111行,位于get_config中的文件“ C:\ Users \ LZ_Jaja \ Anaconda3 \ envs \ tensorflow_gpu \ lib \ site-packages \ tensorflow \ python \ keras \ engine \ network.py” 引发NotImplementedError NotImplementedError
我也尝试分别保存体系结构和权重,但是在使用model.to_yaml()
时也会引发错误。
model.save_weights()
有效。但是当使用model.load_weights()
加载权重时,会引发一个错误,说我试图用1层权重填充0层(我忘记了确切的错误信息)。
所以我需要一些帮助。如何保存模型并正确加载?