我是机器学习的新手,我已经在this教程工作了一段时间,它需要超过45GB的RAM才能运行。所以我从this教程尝试了渐进式加载。
这是我得到的错误 ValueError:检查输入时出错:期望input_1有2个维度,但得到的数组有形状(13,224,224,3)
这里是模型函数
# define the captioning model
def define_model(vocab_size, max_length):
# feature extractor model
inputs1 = Input(shape=(4096,))
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)
# sequence model
inputs2 = Input(shape=(max_length,))
se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(256)(se2)
# decoder model
decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)
# tie it together [image, seq] [word]
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')
# summarize model
print(model.summary())
plot_model(model, to_file='model.png', show_shapes=True)
return model
loading_photo功能
# load a single photo intended as input for the VGG feature extractor model
def load_photo(filename):
image = load_img(filename, target_size=(224, 224))
# convert the image pixels to a numpy array
image = img_to_array(image)
# reshape data for the model
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
# prepare the image for the VGG model
image = preprocess_input(image)[0]
# get image id
image_id = filename.split('/')[-1].split('.')[0]
return image, image_id
create_sequences和data_generator函数
# create sequences of images, input sequences and output words for an image
def create_sequences(tokenizer, max_length, desc, image):
Ximages, XSeq, y = list(), list(),list()
vocab_size = len(tokenizer.word_index) + 1
# integer encode the description
seq = tokenizer.texts_to_sequences([desc])[0]
# split one sequence into multiple X,y pairs
for i in range(1, len(seq)):
# select
in_seq, out_seq = seq[:i], seq[i]
# pad input sequence
in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
# encode output sequence
out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
# store
Ximages.append(image)
XSeq.append(in_seq)
y.append(out_seq)
Ximages, XSeq, y = array(Ximages), array(XSeq), array(y)
return [Ximages, XSeq, y]
# data generator, intended to be used in a call to model.fit_generator()
def data_generator(descriptions, tokenizer, max_length):
# loop for ever over images
directory = 'Flicker8k_Dataset'
while 1:
for name in listdir(directory):
# load an image from file
filename = directory + '/' + name
image, image_id = load_photo(filename)
# create word sequences
desc = descriptions[image_id]
in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc, image)
yield [[in_img, in_seq], out_word]
最后
model = define_model(vocab_size, max_length)
# define checkpoint callback
filepath = 'model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
# fit model
model.fit_generator(data_generator(descriptions, tokenizer, max_length), steps_per_epoch=70000)
答案 0 :(得分:0)
错误表示尽管输入数据由13个(224,224,3)形状的图像组成,但特征提取器模型的输入层接受4096长度的矢量。
要解决此问题,首先必须将图像重新整形为def load_photo(filename):
image = image.reshape((1, 224*224*3))
然后在def define_model(vocab_size, max_length):
,
inputs1 = Input(shape=(224*224*3, ))