Question

在Keras中使用model.save（）时出现NotImplementedError

我正在尝试使用Keras来实现简单的编码器-解码器模型。我没有使用Sequential（）。我尝试了Model Subclass。我成功地训练了模型，但无法保存。最初，我没有为模型的第一层添加input_shape。但是，当我将其添加到模型中时，发生了相同的错误。而且我不知道如何保存它。

import tensorflow as tf

import numpy as np
import time
import matplotlib.pyplot as plt

import caption_preprocessing
import train_dataset_preparation

tf.enable_eager_execution()

# some parameters
voc_size = len(caption_preprocessing.tokenizer.word_index)  # voc_size = 10001
embedding_output_dim = 256
lstm_output_dim = 512
encoder_output_dim = embedding_output_dim

EPOCHS = 1


# model definition
class EncoderPart(tf.keras.Model):
    def __init__(self):
        super(EncoderPart, self).__init__()
        self.fc = tf.keras.layers.Dense(input_shape=(64, 2048), units=encoder_output_dim)

    def call(self, inputs, training=None, mask=None):
        y = self.fc(inputs)
        output = tf.nn.relu(y)
        return output


class EmbeddingPart(tf.keras.Model):
    def __init__(self):
        super(EmbeddingPart, self).__init__()
        self.embedding = tf.keras.layers.Embedding(input_shape=(1,),
                                                   input_dim=voc_size,
                                                   output_dim=embedding_output_dim)

    def call(self, inputs, training=None, mask=None):
        output = self.embedding(inputs)
        return output


class DecoderPart(tf.keras.Model):
    def __init__(self):
        super(DecoderPart, self).__init__()
        self.lstm = tf.keras.layers.CuDNNLSTM(input_shape=(64, embedding_output_dim),
                                              units=lstm_output_dim,
                                              return_sequences=True,
                                              return_state=True,
                                              recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(units=voc_size)

    def call(self, inputs, training=None, mask=None):
        y, state_h, state_c = self.lstm(inputs)
        output = self.fc(y)
        return output, state_c

    def reset_states(self, batch_size):
        return tf.zeros((batch_size, lstm_output_dim))


def _loss_function(real, pred):
    mask = 1 - np.equal(real, 0)    # ignore the loss of padding words
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) * mask
    return tf.reduce_mean(loss)


if __name__ == '__main__':
    # training model
    encoder = EncoderPart()
    embedding = EmbeddingPart()
    decoder = DecoderPart()

    optimizer = tf.train.AdamOptimizer()

    loss_plot = []

    time_per_epoch = time.time()
    for epoch in range(EPOCHS):
        start_time = time.time()
        total_loss = 0

        for (batch, (image_feature_vec, caption)) in enumerate(train_dataset_preparation.dataset):
            # print(batch+1, caption.shape)
            batch_loss = 0
            decoder.reset_states(caption.shape[0])  # reset the states of the lstm layer
            start_vec = [caption_preprocessing.tokenizer.word_index['<start>']] * caption.shape[0]
            dec_input = tf.expand_dims(start_vec, 1)    # shape=(BATCH_SIZE, 1)

            with tf.GradientTape() as tape:
                image_feature = encoder(image_feature_vec)
                _, state = decoder(image_feature)   # _.shape=(BATCH_SIZE, 64, 10001), state.shape=(BATCH_SIZE, 512)

                for i in range(1, caption.shape[1]):
                    word_embedding = embedding(dec_input)   # shape=(BATCH_SIZE, 1, embedding_output_dim)
                    predictions, _ = decoder(word_embedding)    # shape=(BATCH_SIZE, 1, voc_size)
                    predictions = tf.reshape(predictions, (caption.shape[0], -1))   # shape=(BATCH_SIZE, embedding_output_dim)
                    batch_loss += _loss_function(caption[:, i], predictions)

                    dec_input = tf.expand_dims(caption[:, i], 1)

            total_loss += (batch_loss / int(caption.shape[1]))

            variables = encoder.variables + embedding.variables + decoder.variables
            gradients = tape.gradient(batch_loss, variables)
            optimizer.apply_gradients(zip(gradients, variables), tf.train.get_or_create_global_step())

            if (batch + 1) % 100 == 0:
                print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                         batch + 1,
                                                         batch_loss.numpy() / int(caption.shape[1])))

    loss_plot.append(total_loss / len(caption_preprocessing.train_cap_vec))
    print('Epoch {} Loss {:.6f}'.format(epoch + 1,
                                        total_loss / len(caption_preprocessing.train_cap_vec)))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start_time))

time_per_epoch = (time.time() - time_per_epoch) / EPOCHS

# loss plot
plt.plot(loss_plot)
plt.xlabel('epochs')
plt.ylabel('loss')
plt.title('loss plot')
plt.show()

# save model
save_path = r'\20190326'

encoder _save_file = r'C:\Users\LZ_Jaja\PycharmProjects\Show_and_Tell_190314\model_and_eval' + save_path + r'\encoder.h5'

tf.keras.models.save_model(encoder, embedding_w_save_file)

我尝试了model.save()和tf.keras.models.save_model()。他们两个都提出相同的错误。错误如下：

回溯（最近通话最近）：文件“ C：/Users/LZ_Jaja/PycharmProjects/Show_and_Tell_190314/model_train_and_save.py”，第138行，在 tf.keras.models.save_model（编码器，embedding_w_save_file）第101行的文件“ C：\ Users \ LZ_Jaja \ Anaconda3 \ envs \ tensorflow_gpu \ lib \ site-packages \ tensorflow \ python \ keras \ engine \ saving.py”在save_model中 'config'：model.get_config（）第1111行，位于get_config中的文件“ C：\ Users \ LZ_Jaja \ Anaconda3 \ envs \ tensorflow_gpu \ lib \ site-packages \ tensorflow \ python \ keras \ engine \ network.py” 引发NotImplementedError NotImplementedError

我也尝试分别保存体系结构和权重，但是在使用model.to_yaml()时也会引发错误。

model.save_weights()有效。但是当使用model.load_weights()加载权重时，会引发一个错误，说我试图用1层权重填充0层（我忘记了确切的错误信息）。

所以我需要一些帮助。如何保存模型并正确加载？

在Keras中使用model.save（）时出现NotImplementedError

0 个答案: