我正在尝试从TF Tutorial(关于图像字幕:https://www.tensorflow.org/tutorials/text/image_captioning)中保存和加载CNN编码器和RNN解码器。由于这些是Keras模型的子类,而不是功能模型或顺序模型,因此我无法直接使用model.save
和model.load
。
相反,我不得不使用model.save_weights
和model.load_weights
。问题是model.load_weights
仅在model.build
之后才能调用,并且model.build
要求input_shape
参数必须是元组而不是元组列表。但是,对于我们的RNN解码器,我们有多个输入。 Keras文档指定无法通过多个输入来调用model.build
。
还有其他加载模型的方法吗?
最终,我希望有一个较小的python脚本,它可以加载模型权重并进行推断。该脚本不需要培训。
协作:https://colab.research.google.com/drive/12YtCH2X0pwIBBXPW0TXmeA520MyVv9AF
答案 0 :(得分:1)
这是我设法解决此问题的方法。不是一个很好的解决方案,但可以!
首先,将每个权重矩阵保存在.npy
文件中:
for i, layer in enumerate(encoder.layers):
print("Layer %s" %i, layer.name)
for j, w in enumerate(layer.weights):
print(w.shape)
np.save("encoder_layer_weights/layer_%s_%s_weights_%s.npy" %(i, layer.name, j), w.numpy())
for i, layer in enumerate(decoder.layers):
print("Layer %s" %i, layer.name)
for j, w in enumerate(layer.weights):
print(w.shape)
np.save("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(i, layer.name, j), w.numpy())
然后您重新创建子类化模型,但是这次您对每个图层中的每个权重使用initializers。必须谨慎进行,因为如果形状不匹配,则您的模型将无法编译。
class CNN_Encoder(tf.keras.Model):
# Since you have already extracted the features and dumped it using pickle
# This encoder passes those features through a Fully connected layer
def __init__(self, embedding_dim):
super(CNN_Encoder, self).__init__()
# shape after fc == (batch_size, 64, embedding_dim)
C = tf.keras.initializers.Constant
w1, w2 = [np.load("encoder_layer_weights/layer_%s_%s_weights_%s.npy" %(0, "dense", j)) \
for j in range(2)]
self.fc = tf.keras.layers.Dense(embedding_dim, kernel_initializer=C(w1), bias_initializer=C(w2))
def call(self, x):
x = self.fc(x)
x = tf.nn.relu(x)
return x
class BahdanauAttention(tf.keras.Model):
def __init__(self, units):
super(BahdanauAttention, self).__init__()
C = tf.keras.initializers.Constant
w1, w2, w3, w4, w5, w6 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(4, "bahdanau_attention", j)) \
for j in range(6)]
self.W1 = tf.keras.layers.Dense(units, kernel_initializer=C(w1), bias_initializer=C(w2))
self.W2 = tf.keras.layers.Dense(units, kernel_initializer=C(w3), bias_initializer=C(w4))
self.V = tf.keras.layers.Dense(1, kernel_initializer=C(w5), bias_initializer=C(w6))
def call(self, features, hidden):
# features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)
# hidden shape == (batch_size, hidden_size)
# hidden_with_time_axis shape == (batch_size, 1, hidden_size)
hidden_with_time_axis = tf.expand_dims(hidden, 1)
# score shape == (batch_size, 64, hidden_size)
score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
# attention_weights shape == (batch_size, 64, 1)
# you get 1 at the last axis because you are applying score to self.V
attention_weights = tf.nn.softmax(self.V(score), axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
class RNN_Decoder(tf.keras.Model):
def __init__(self, embedding_dim, units, vocab_size):
super(RNN_Decoder, self).__init__()
self.units = units
C = tf.keras.initializers.Constant
w_emb = np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(0, "embedding", 0))
w_gru_1, w_gru_2, w_gru_3 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(1, "gru", j)) for j in range(3)]
w1, w2 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(2, "dense_1", j)) for j in range(2)]
w3, w4 = [np.load("decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(3, "dense_2", j)) for j in range(2)]
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, embeddings_initializer=C(w_emb))
self.gru = tf.keras.layers.GRU(self.units,
return_sequences=True,
return_state=True,
kernel_initializer=C(w_gru_1),
recurrent_initializer=C(w_gru_2),
bias_initializer=C(w_gru_3)
)
self.fc1 = tf.keras.layers.Dense(self.units, kernel_initializer=C(w1), bias_initializer=C(w2))
self.fc2 = tf.keras.layers.Dense(vocab_size, kernel_initializer=C(w3), bias_initializer=C(w4))
self.attention = BahdanauAttention(self.units)
def call(self, x, features, hidden):
# defining attention as a separate model
context_vector, attention_weights = self.attention(features, hidden)
# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
# passing the concatenated vector to the GRU
output, state = self.gru(x)
# shape == (batch_size, max_length, hidden_size)
x = self.fc1(output)
# x shape == (batch_size * max_length, hidden_size)
x = tf.reshape(x, (-1, x.shape[2]))
# output shape == (batch_size * max_length, vocab)
x = self.fc2(x)
return x, state, attention_weights
def reset_state(self, batch_size):
return tf.zeros((batch_size, self.units))
最后,您将像往常一样实例化Encoder
和Decoder
类:
encoder = CNN_Encoder(embedding_dim)
decoder = RNN_Decoder(embedding_dim, units, vocab_size)