我正在尝试使用 TensorFlow 2.0 和 tf.keras API训练图像字幕模型。我正在使用的数据集是Flickr 8K数据集,尽管我的计算机可以将整个数据集保存在RAM中,但我还是想将fit_generator与data_generator配合使用来批量加载和准备数据(因为一旦我可以使其与该数据集一起使用我将尝试使用更大的数据集训练该模型。
我预处理数据的方式和模型定义都可以。我可以对手动生成的批处理执行model.predict(),并且模型输出预期的数据形状并且没有错误。我还可以手动使用data_generator来准备完整的数据集,并通过 model.fit()使用整个数据,并且可以正常工作,模型可以正确地训练。
问题出在我尝试使用 fit_generator 进行训练时,它将输出此错误(帖子末尾输出的全长错误):
ValueError: could not broadcast input array from shape (168,2048) into shape (168)
如果我单独调用生成器函数来检查生成的批处理的类型和形状,在我看来一切正常:
generator = data_generator(train_descriptions, train_features, wordtoix, max_length, number_pics_per_bath)
data = next(generator)
print("Total items in data: ", len(data))
# Data[1] is the encoded Y
print("Encodded Y shape: ", data[1].shape)
print("Example Y: ", data[1][0])
# Data[0] is a list of [image_feature, encoded_caption]
print("X1 shape (image feature): ", data[0][0].shape)
print("X2 shape (image caption): ", data[0][1].shape)
Outputs:
-----------------------------------
Total items in data: 2
Encodded Y shape: (168, 1652)
Example Y: [0. 0. 1. ... 0. 0. 0.]
X1 shape (image feature): (168, 2048)
X2 shape (image caption): (168, 34)
这是 data_generator 函数的代码:
# data generator, intended to be used in a call to model.fit_generator()
# $descriptions: a dictionary containing <image_id> -> [ text_captions_list ]
# photos: list of numpy arrays representing image features
# wordtoix: a dictionary to convert words to word_codes (integers)
# max_length: maximum word count for a caption
def data_generator(descriptions, photos, wordtoix, max_length, num_photos_per_batch):
X1, X2, y = list(), list(), list()
n=0
# loop for ever over images
while 1:
for key, desc_list in descriptions.items():
n+=1
# retrieve the photo feature
photo = photos[key+'.jpg']
for desc in desc_list:
# encode the sequence
seq = [wordtoix[word] for word in desc.split(' ') if word in wordtoix]
# split one sequence into multiple X, y pairs
for i in range(1, len(seq)):
# split into input and output pair
in_seq, out_seq = seq[:i], seq[i]
# pad input sequence
in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
# encode output sequence
out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
# store
X1.append(photo)
X2.append(in_seq)
y.append(out_seq)
# yield the batch data
if n==num_photos_per_batch:
yield [[array(X1), array(X2)], array(y)]
X1, X2, y = list(), list(), list()
n=0
这就是我叫 fit_generator 的方式:
epochs = 20
steps = len(train_descriptions)
for i in range(epochs):
generator = data_generator(train_descriptions, train_features, wordtoix, max_length)
model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
model.save('./saved/model_' + str(i) + '.h5')
for i in range(epochs):
generator = data_generator(train_descriptions, train_features, wordtoix, max_length, number_pics_per_bath)
model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
model.save('./saved/model_' + str(i) + '.h5')
我正在使用 inceptionv3 模型和imagenet预先训练的权重来从图像生成特征(然后将其保存到磁盘)。
然后,我使用期望“两个输入”的模型装配:图像功能数组和编码图像标题:
inputs1 = tf.keras.Input(shape=(2048,))
fe1 = tf.keras.layers.Dropout(0.5)(inputs1)
fe2 = tf.keras.layers.Dense(256, activation='relu')(fe1)
inputs2 = tf.keras.Input(shape=(max_length,))
se1 = tf.keras.layers.Embedding(vocab_size, embedding_dim, mask_zero=True)(inputs2)
se2 = tf.keras.layers.Dropout(0.5)(se1)
se3 = tf.keras.layers.LSTM(256)(se2)
decoder1 = tf.keras.layers.concatenate([fe2, se3])
decoder2 = tf.keras.layers.Dense(256, activation='relu')(decoder1)
outputs = tf.keras.layers.Dense(vocab_size, activation='softmax')(decoder2)
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
fit_generator的全长错误如下:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-73-10ea3905954d> in <module>
1 for i in range(epochs):
2 generator = data_generator(train_descriptions, train_features, wordtoix, max_length, number_pics_per_bath)
----> 3 model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
4 model.save('./saved/model_' + str(i) + '.h5')
~/anaconda3/envs/tf2-gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1295 shuffle=shuffle,
1296 initial_epoch=initial_epoch,
-> 1297 steps_name='steps_per_epoch')
1298
1299 def evaluate_generator(self,
~/anaconda3/envs/tf2-gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
263
264 is_deferred = not model._is_compiled
--> 265 batch_outs = batch_function(*batch_data)
266 if not isinstance(batch_outs, list):
267 batch_outs = [batch_outs]
~/anaconda3/envs/tf2-gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
971 outputs = training_v2_utils.train_on_batch(
972 self, x, y=y, sample_weight=sample_weight,
--> 973 class_weight=class_weight, reset_metrics=reset_metrics)
974 outputs = (outputs['total_loss'] + outputs['output_losses'] +
975 outputs['metrics'])
~/anaconda3/envs/tf2-gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics)
251 x, y, sample_weights = model._standardize_user_data(
252 x, y, sample_weight=sample_weight, class_weight=class_weight,
--> 253 extract_tensors_from_dataset=True)
254 batch_size = array_ops.shape(nest.flatten(x, expand_composites=True)[0])[0]
255 # If `model._distribution_strategy` is True, then we are in a replica context
~/anaconda3/envs/tf2-gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset)
2470 feed_input_shapes,
2471 check_batch_axis=False, # Don't enforce the batch size.
-> 2472 exception_prefix='input')
2473
2474 # Get typespecs for the input data and sanitize it if necessary.
~/anaconda3/envs/tf2-gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
504 elif isinstance(data, (list, tuple)):
505 if isinstance(data[0], (list, tuple)):
--> 506 data = [np.asarray(d) for d in data]
507 elif len(names) == 1 and isinstance(data[0], (float, int)):
508 data = [np.asarray(data)]
~/anaconda3/envs/tf2-gpu/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_utils.py in <listcomp>(.0)
504 elif isinstance(data, (list, tuple)):
505 if isinstance(data[0], (list, tuple)):
--> 506 data = [np.asarray(d) for d in data]
507 elif len(names) == 1 and isinstance(data[0], (float, int)):
508 data = [np.asarray(data)]
~/anaconda3/envs/tf2-gpu/lib/python3.6/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
83
84 """
---> 85 return array(a, dtype, copy=False, order=order)
86
87
ValueError: could not broadcast input array from shape (168,2048) into shape (168)
在此先感谢您的帮助!
答案 0 :(得分:0)
对于def data_generator应该有一个return语句。