我正在尝试为可视问题生成模型运行代码(找到here)。我在Linux的Anaconda虚拟环境中使用Linux的Windows子系统运行代码。我正在使用Tensorflow v1.3.0,因为我在使用Tensorflow的最新版本时遇到了问题-存储库相对较旧。
我收到以下错误(包括完整的追溯):
Traceback (most recent call last):
File "main.py", line 70, in <module>
tf.app.run()
File "/home/username/anaconda2/envs/py27/lib/python2.7/site-packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "main.py", line 64, in main
model.train()
File "/home/username/VQG-tensorflow/question_generator.py", line 124, in train
feats = self.img_feature[img_list,:]
IndexError: index 82459 is out of bounds for axis 0 with size 82459
我在下面包含了main.py和question_generator.py的源代码。显然,程序正在尝试访问不存在的索引。我不知道是什么使它表现为这种方式。与此类似的问题(例如this和this)没有帮助。我尝试使用numpy.pad方法填充数组,但这只会导致另一个不同的相关错误:
ValueError: Cannot feed value of shape (256, 4097) for Tensor u'Placeholder:0', which has shape '(256, 4096)'
任何帮助都将不胜感激!
main.py的源代码:
#-*- coding: utf-8 -*-
import math
import os
import tensorflow as tf
import numpy as np
import cPickle
import skimage
import pprint
import tensorflow.python.platform
from keras.preprocessing import sequence
from data_loader import *
import vgg19
import question_generator
flags = tf.app.flags
pp = pprint.PrettyPrinter().pprint
tf.app.flags.DEFINE_string('input_img_h5', './data_img.h5', 'path to the h5file containing the image feature')
tf.app.flags.DEFINE_string('input_ques_h5', './data_prepro.h5', 'path to the h5file containing the preprocessed dataset')
tf.app.flags.DEFINE_string('input_json', './data_prepro.json', 'path to the json file containing additional info and vocab')
tf.app.flags.DEFINE_string('model_path', './models/', 'where should we save')
tf.app.flags.DEFINE_string('vgg_path', './vgg16.tfmodel', 'momentum for adam')
tf.app.flags.DEFINE_string('gpu_fraction', '2/3', 'define the gpu fraction used')
tf.app.flags.DEFINE_string('test_image_path', './assets/demo.jpg', 'the image you want to generate question')
tf.app.flags.DEFINE_string('test_model_path', './models/model-250', 'model we saved')
tf.app.flags.DEFINE_integer('batch_size', 256, 'tch_size for each iterations')
tf.app.flags.DEFINE_integer('dim_embed', 512, 'word embedding size')
tf.app.flags.DEFINE_integer('dim_hidden', 512, 'hidden size')
tf.app.flags.DEFINE_integer('dim_image', 4096, 'dimension of output from fc7')
tf.app.flags.DEFINE_integer('img_norm', 1, 'do normalization on image or not')
tf.app.flags.DEFINE_integer('maxlen', 26, 'max length of question')
tf.app.flags.DEFINE_integer('n_epochs', 250, 'how many epochs are we going to train')
tf.app.flags.DEFINE_float('learning_rate', '0.001', 'learning rate for adam')
tf.app.flags.DEFINE_float('momentum', 0.9, 'momentum for adam')
tf.app.flags.DEFINE_boolean('is_train', 'True', 'momentum for adam')
conf = flags.FLAGS
def calc_gpu_fraction(fraction_string):
idx, num = fraction_string.split('/')
idx, num = float(idx), float(num)
fraction = 1 / (num - idx + 1)
print " [*] GPU : %.4f" % fraction
return fraction
def main(_):
attrs = conf.__dict__['__flags']
pp(attrs)
dataset, img_feature, train_data = get_data(conf.input_json, conf.input_img_h5, conf.input_ques_h5, conf.img_norm)
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
model = question_generator.Question_Generator(sess, conf, dataset, img_feature, train_data)
if conf.is_train:
model.build_model()
model.train()
else:
model.build_generator()
model.test(test_image_path=conf.test_image_path, model_path=conf.test_model_path, maxlen=26)
if __name__ == '__main__':
tf.app.run()
question_generation.py的源代码:
import os
import tensorflow as tf
import numpy as np
import tensorflow.python.platform
from keras.preprocessing import sequence
from data_loader import *
import vgg19
tf.pack = tf.stack
tf.select = tf.where
tf.batch_matmul = tf.matmul
class Question_Generator():
def __init__(self, sess, conf, dataset, img_feature, train_data):
self.sess = sess
self.dataset = dataset
self.img_feature = img_feature
self.train_data = train_data
self.dim_image = conf.dim_image
self.dim_embed = conf.dim_embed
self.dim_hidden = conf.dim_hidden
self.batch_size = conf.batch_size
self.maxlen = conf.maxlen
self.n_lstm_steps = conf.maxlen+2
self.model_path = conf.model_path
if conf.is_train:
self.n_epochs = conf.n_epochs
self.learning_rate = conf.learning_rate
self.num_train = train_data['question'].shape[0] # total number of data
self.n_words = len(dataset['ix_to_word'].keys()) # vocabulary_size
# word embedding
self.Wemb = tf.Variable(tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1), name='Wemb')
self.bemb = tf.Variable(tf.random_uniform([self.dim_embed], -0.1, 0.1), name='bemb')
# LSTM
self.lstm = tf.contrib.rnn.BasicLSTMCell(self.dim_hidden)
#self.lstm = tf.nn.rnn_cell.BasicLSTMCell(self.dim_hidden)
# fc7 encoder
self.encode_img_W = tf.Variable(tf.random_uniform([self.dim_image, self.dim_hidden], -0.1, 0.1), name='encode_img_W')
self.encode_img_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.1, 0.1), name='encode_img_b')
# feat -> word
self.embed_word_W = tf.Variable(tf.random_uniform([self.dim_hidden, self.n_words], -0.1, 0.1), name='embed_word_W')
self.embed_word_b = tf.Variable(tf.random_uniform([self.n_words], -0.1, 0.1), name='embed_word_b')
def build_model(self):
self.image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image])
self.question = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])
self.mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])
image_emb = tf.nn.xw_plus_b(self.image, self.encode_img_W, self.encode_img_b) # (batch_size, dim_hidden)
state = self.lstm.zero_state(self.batch_size,tf.float32)
loss = 0.0
with tf.variable_scope("RNN"):
for i in range(self.n_lstm_steps):
if i == 0:
current_emb = image_emb
else:
tf.get_variable_scope().reuse_variables()
current_emb = tf.nn.embedding_lookup(self.Wemb, self.question[:,i-1]) + self.bemb
# LSTM
output, state = self.lstm(current_emb, state)
if i > 0:
# ground truth
labels = tf.expand_dims(self.question[:, i], 1)
indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1)
concated = tf.concat([indices, labels], 1)
#concated = tf.concat(1, [indices, labels])
onehot_labels = tf.sparse_to_dense(
concated, tf.pack([self.batch_size, self.n_words]), 1.0, 0.0)
# predict word
logit_words = tf.nn.xw_plus_b(output, self.embed_word_W, self.embed_word_b)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit_words, labels=onehot_labels)
cross_entropy = cross_entropy * self.mask[:,i]
current_loss = tf.reduce_sum(cross_entropy)
loss = loss + current_loss
self.loss = loss / tf.reduce_sum(self.mask[:,1:])
def build_generator(self):
self.image = tf.placeholder(tf.float32, [1, self.dim_image]) # only one image
image_emb = tf.nn.xw_plus_b(self.image, self.encode_img_W, self.encode_img_b)
state = tf.zeros([1, self.lstm.state_size])
self.generated_words = []
with tf.variable_scope("RNN"):
output, state = self.lstm(image_emb, state)
last_word = tf.nn.embedding_lookup(self.Wemb, [0]) + self.bemb
for i in range(self.maxlen):
tf.get_variable_scope().reuse_variables()
output, state = self.lstm(last_word, state)
logit_words = tf.nn.xw_plus_b(output, self.embed_word_W, self.embed_word_b)
max_prob_word = tf.argmax(logit_words, 1)
last_word = tf.nn.embedding_lookup(self.Wemb, max_prob_word)
last_word += self.bemb
self.generated_words.append(max_prob_word)
def train(self):
index = np.arange(self.num_train)
np.random.shuffle(index)
questions = self.train_data['question'][index,:]
img_list = self.train_data['img_list'][index]
print("img feature length: " + str(len(self.img_feature)))
print("img list: " + str(img_list))
#self.img_feature = np.pad(self.img_feature, (0,1),'constant', constant_values=(0,0)) #pad array to prevent bug
print("img feature length: " + str(len(self.img_feature)))
feats = self.img_feature[img_list,:]
self.saver = tf.train.Saver(max_to_keep=50)
train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
tf.initialize_all_variables().run()
for epoch in range(self.n_epochs):
counter = 0
for start, end in zip( \
range(0, len(feats), self.batch_size),
range(self.batch_size, len(feats), self.batch_size)
):
current_feats = feats[start:end]
current_questions = questions[start:end]
current_question_matrix = sequence.pad_sequences(current_questions, padding='post', maxlen=self.maxlen+1)
current_question_matrix = np.hstack( [np.full( (len(current_question_matrix),1), 0), current_question_matrix] ).astype(int)
current_mask_matrix = np.zeros((current_question_matrix.shape[0], current_question_matrix.shape[1]))
nonzeros = np.array( map(lambda x: (x != 0).sum()+2, current_question_matrix ))
# +2 -> #START# and '.'
for ind, row in enumerate(current_mask_matrix):
row[:nonzeros[ind]] = 1
_, loss_value = self.sess.run([train_op, self.loss], feed_dict={
self.image: current_feats,
self.question : current_question_matrix,
self.mask : current_mask_matrix
})
if np.mod(counter, 100) == 0:
print "Epoch: ", epoch, " batch: ", counter ," Current Cost: ", loss_value
counter = counter + 1
if np.mod(epoch, 25) == 0:
print "Epoch ", epoch, " is done. Saving the model ... "
self.save_model(epoch)
def test(self, test_image_path, model_path, maxlen):
ixtoword = self.dataset['ix_to_word']
images = tf.placeholder("float32", [1, 224, 224, 3])
image_val = read_image(test_image_path)
vgg = vgg19.Vgg19()
with tf.name_scope("content_vgg"):
vgg.build(images)
fc7 = self.sess.run(vgg.relu7, feed_dict={images:image_val})
saver = tf.train.Saver()
saver.restore(self.sess, model_path)
generated_word_index = self.sess.run(self.generated_words, feed_dict={self.image:fc7})
generated_word_index = np.hstack(generated_word_index)
generated_sentence = ''
for x in generated_word_index:
if x==0:
break
word = ixtoword[str(x)]
generated_sentence = generated_sentence + ' ' + word
print ' '
print '--------------------------------------------------------------------------------------------------------'
print generated_sentence
def save_model(self, epoch):
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
self.saver.save(self.sess, os.path.join(self.model_path, 'model'), global_step=epoch)
答案 0 :(得分:1)
这是一个非常基本的问题。当您运行此代码时,您不了解的是数组(Python中的列表)是0索引的。如果您有一个长度为n的列表,那么当您尝试访问该列表中的第n个元素时,将收到索引错误。