我正在尝试为图像字幕网络实现Keras模型,并且像这样定义我的模型:
import os
from keras.applications.vgg16 import VGG16
import numpy as np
from numpy import array, argmax, random, take
from pickle import dump
from pickle import load
import string
from keras.applications.imagenet_utils import preprocess_input
from keras.models import Model
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
import tensorflow as tf
from keras.utils import plot_model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dropout
from keras.layers.merge import add
from keras.callbacks import ModelCheckpoint
from keras.losses import categorical_crossentropy
import matplotlib.pyplot as plt
%matplotlib inline
from nltk.translate.bleu_score import corpus_bleu
def create_sequences(tokenizer, max_length, desc_list, photo):
X1, X2, y = list(), list(), list()
for desc in desc_list:
seq = tokenizer.texts_to_sequences([desc])[0]
print(len(seq))
for i in range(1, len(seq)):
in_seq, out_seq = seq[:i], seq[i]
in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
X1.append(photo)
X2.append(in_seq)
y.append(out_seq)
return array(X1), array(X2), array(y)
def define_model(vocab_size, max_length):
inputs1 = Input(shape=(4096,))
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)
inputs2 = Input(shape=(max_length,))
se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(256)(se2)
decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')
print(model.summary())
return model
def data_generator(descriptions, photos, tokenizer, max_length):
while 1:
for key, desc_list in descriptions.items():
photo = photos[key]
print(photo.shape)
print(len(desc_list))
in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo)
yield [[in_img, in_seq], out_word]
然后运行fit方法:
filepath = 'model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
model = define_model(vocab_size, max_length)
steps = len(train_descriptions)
generator = data_generator(train_descriptions, train_features, tokenizer, max_length)
val_generator=data_generator(test_descriptions, test_features, tokenizer, max_length)
model.fit(generator, epochs=20, steps_per_epoch=steps, verbose=1,callbacks=[checkpoint],validation_data=val_generator,validation_steps=1)
我得到了错误:
ValueError:没有为任何变量提供渐变:['embedding_1 / embeddings:0','dense_3 / kernel:0','dense_3 / bias:0','lstm_1 / lstm_cell_1 / kernel:0','lstm_1 / lstm_cell_1 / recurrent_kernel:0','lstm_1 / lstm_cell_1 / bias:0','dense_4 / kernel:0','dense_4 / bias:0','dense_5 / kernel:0','dense_5 / bias:0']。
然后尝试不使用生成器的fit方法,得到新错误:
ValueError:密集层24的输入0与该层不兼容:预期输入形状的轴-1的值为4096,但收到形状为[None,1]的输入
有人可以帮助我吗?我不知道错误在哪里,因为我从未见过 我一直在检查SO中的类似帖子,但是那里没有任何解决方案对我有用
谢谢!