Question

我正在尝试为可视问题生成模型运行代码（找到here）。我在Linux的Anaconda虚拟环境中使用Linux的Windows子系统运行代码。我正在使用Tensorflow v1.3.0，因为我在使用Tensorflow的最新版本时遇到了问题-存储库相对较旧。

我收到以下错误（包括完整的追溯）：

Traceback (most recent call last):                                                                                                                                                                    
   File "main.py", line 70, in <module>                                                                                                                                                                  
     tf.app.run()                                                                                                                                                                                      
   File "/home/username/anaconda2/envs/py27/lib/python2.7/site-packages/tensorflow/python/platform/app.py", line 48, in run                                                                           
     _sys.exit(main(_sys.argv[:1] + flags_passthrough))                                                                                                                                                
   File "main.py", line 64, in main                                                                                                                                                                      
      model.train()                                                                                                                                                                                     
   File "/home/username/VQG-tensorflow/question_generator.py", line 124, in train                                                                                                                     
      feats = self.img_feature[img_list,:]                                                                                                                                                            
 IndexError: index 82459 is out of bounds for axis 0 with size 82459

我在下面包含了main.py和question_generator.py的源代码。显然，程序正在尝试访问不存在的索引。我不知道是什么使它表现为这种方式。与此类似的问题（例如this和this）没有帮助。我尝试使用numpy.pad方法填充数组，但这只会导致另一个不同的相关错误：

ValueError: Cannot feed value of shape (256, 4097) for Tensor u'Placeholder:0', which has shape '(256, 4096)'

任何帮助都将不胜感激！

main.py的源代码：

#-*- coding: utf-8 -*-
import math
import os
import tensorflow as tf
import numpy as np
import cPickle
import skimage
import pprint
import tensorflow.python.platform
from keras.preprocessing import sequence
from data_loader import *
import vgg19
import question_generator

flags = tf.app.flags
pp = pprint.PrettyPrinter().pprint

tf.app.flags.DEFINE_string('input_img_h5', './data_img.h5', 'path to the h5file containing the image feature')
tf.app.flags.DEFINE_string('input_ques_h5', './data_prepro.h5', 'path to the h5file containing the preprocessed dataset')
tf.app.flags.DEFINE_string('input_json', './data_prepro.json', 'path to the json file containing additional info and vocab')
tf.app.flags.DEFINE_string('model_path', './models/', 'where should we save')
tf.app.flags.DEFINE_string('vgg_path', './vgg16.tfmodel', 'momentum for adam')
tf.app.flags.DEFINE_string('gpu_fraction', '2/3', 'define the gpu fraction used')
tf.app.flags.DEFINE_string('test_image_path', './assets/demo.jpg', 'the image you want to generate question')
tf.app.flags.DEFINE_string('test_model_path', './models/model-250', 'model we saved')

tf.app.flags.DEFINE_integer('batch_size', 256, 'tch_size for each iterations')
tf.app.flags.DEFINE_integer('dim_embed', 512, 'word embedding size')
tf.app.flags.DEFINE_integer('dim_hidden', 512, 'hidden size')
tf.app.flags.DEFINE_integer('dim_image', 4096, 'dimension of output from fc7')
tf.app.flags.DEFINE_integer('img_norm', 1, 'do normalization on image or not')
tf.app.flags.DEFINE_integer('maxlen', 26, 'max length of question')
tf.app.flags.DEFINE_integer('n_epochs', 250, 'how many epochs are we going to train')
tf.app.flags.DEFINE_float('learning_rate', '0.001', 'learning rate for adam')
tf.app.flags.DEFINE_float('momentum', 0.9, 'momentum for adam')
tf.app.flags.DEFINE_boolean('is_train', 'True', 'momentum for adam')

conf = flags.FLAGS

def calc_gpu_fraction(fraction_string):
  idx, num = fraction_string.split('/')
  idx, num = float(idx), float(num)

  fraction = 1 / (num - idx + 1)
  print " [*] GPU : %.4f" % fraction
  return fraction

def main(_):

    attrs = conf.__dict__['__flags']
    pp(attrs)

    dataset, img_feature, train_data = get_data(conf.input_json, conf.input_img_h5, conf.input_ques_h5, conf.img_norm)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        model = question_generator.Question_Generator(sess, conf, dataset, img_feature, train_data)

        if conf.is_train:
            model.build_model()
        model.train()
    else:
        model.build_generator()
        model.test(test_image_path=conf.test_image_path, model_path=conf.test_model_path, maxlen=26)

if __name__ == '__main__':
    tf.app.run()

question_generation.py的源代码：

import os
import tensorflow as tf
import numpy as np
import tensorflow.python.platform
from keras.preprocessing import sequence
from data_loader import *
import vgg19

tf.pack = tf.stack
tf.select = tf.where
tf.batch_matmul = tf.matmul



class Question_Generator():
    def __init__(self, sess, conf, dataset, img_feature, train_data):
    self.sess = sess
    self.dataset = dataset
    self.img_feature = img_feature
    self.train_data = train_data
        self.dim_image = conf.dim_image
        self.dim_embed = conf.dim_embed
        self.dim_hidden = conf.dim_hidden
        self.batch_size = conf.batch_size
    self.maxlen = conf.maxlen
        self.n_lstm_steps = conf.maxlen+2
        self.model_path = conf.model_path
    if conf.is_train:
        self.n_epochs = conf.n_epochs
        self.learning_rate = conf.learning_rate

    self.num_train = train_data['question'].shape[0] # total number of data
    self.n_words = len(dataset['ix_to_word'].keys()) # vocabulary_size

        # word embedding
        self.Wemb = tf.Variable(tf.random_uniform([self.n_words, self.dim_embed], -0.1, 0.1), name='Wemb')
        self.bemb = tf.Variable(tf.random_uniform([self.dim_embed], -0.1, 0.1), name='bemb')

        # LSTM
        self.lstm = tf.contrib.rnn.BasicLSTMCell(self.dim_hidden)
        #self.lstm = tf.nn.rnn_cell.BasicLSTMCell(self.dim_hidden)

        # fc7 encoder
        self.encode_img_W = tf.Variable(tf.random_uniform([self.dim_image, self.dim_hidden], -0.1, 0.1), name='encode_img_W')
        self.encode_img_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.1, 0.1), name='encode_img_b')

        # feat -> word
        self.embed_word_W = tf.Variable(tf.random_uniform([self.dim_hidden, self.n_words], -0.1, 0.1), name='embed_word_W')
        self.embed_word_b = tf.Variable(tf.random_uniform([self.n_words], -0.1, 0.1), name='embed_word_b')

    def build_model(self):
        self.image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image])
        self.question = tf.placeholder(tf.int32, [self.batch_size, self.n_lstm_steps])
        self.mask = tf.placeholder(tf.float32, [self.batch_size, self.n_lstm_steps])

        image_emb = tf.nn.xw_plus_b(self.image, self.encode_img_W, self.encode_img_b)        # (batch_size, dim_hidden)

    state = self.lstm.zero_state(self.batch_size,tf.float32)
        loss = 0.0
        with tf.variable_scope("RNN"):
            for i in range(self.n_lstm_steps): 
                if i == 0:
                    current_emb = image_emb
                else:
                    tf.get_variable_scope().reuse_variables()
                    current_emb = tf.nn.embedding_lookup(self.Wemb, self.question[:,i-1]) + self.bemb

                # LSTM
                output, state = self.lstm(current_emb, state) 

                if i > 0:
                    # ground truth
                    labels = tf.expand_dims(self.question[:, i], 1) 
                    indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1)
                    concated = tf.concat([indices, labels], 1)
                    #concated = tf.concat(1, [indices, labels])
                    onehot_labels = tf.sparse_to_dense(
                            concated, tf.pack([self.batch_size, self.n_words]), 1.0, 0.0) 

                    # predict word
                    logit_words = tf.nn.xw_plus_b(output, self.embed_word_W, self.embed_word_b) 

                    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logit_words, labels=onehot_labels)
                    cross_entropy = cross_entropy * self.mask[:,i]                             

                    current_loss = tf.reduce_sum(cross_entropy)
                    loss = loss + current_loss

            self.loss = loss / tf.reduce_sum(self.mask[:,1:])

    def build_generator(self):
        self.image = tf.placeholder(tf.float32, [1, self.dim_image]) # only one image
        image_emb = tf.nn.xw_plus_b(self.image, self.encode_img_W, self.encode_img_b)

        state = tf.zeros([1, self.lstm.state_size])
        self.generated_words = []

        with tf.variable_scope("RNN"):
            output, state = self.lstm(image_emb, state)
            last_word = tf.nn.embedding_lookup(self.Wemb, [0]) + self.bemb

            for i in range(self.maxlen):
                tf.get_variable_scope().reuse_variables()

                output, state = self.lstm(last_word, state)

                logit_words = tf.nn.xw_plus_b(output, self.embed_word_W, self.embed_word_b)
                max_prob_word = tf.argmax(logit_words, 1)

                last_word = tf.nn.embedding_lookup(self.Wemb, max_prob_word)
                last_word += self.bemb

                self.generated_words.append(max_prob_word)

    def train(self):
        index = np.arange(self.num_train)
        np.random.shuffle(index)
        questions = self.train_data['question'][index,:]
        img_list = self.train_data['img_list'][index]
        print("img feature length: " + str(len(self.img_feature)))
        print("img list: " + str(img_list))
        #self.img_feature = np.pad(self.img_feature, (0,1),'constant', constant_values=(0,0)) #pad array to prevent bug
        print("img feature length: " + str(len(self.img_feature)))
        feats = self.img_feature[img_list,:]

        self.saver = tf.train.Saver(max_to_keep=50)
        train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
        tf.initialize_all_variables().run()

        for epoch in range(self.n_epochs):
            counter = 0

            for start, end in zip( \
                    range(0, len(feats), self.batch_size),
                    range(self.batch_size, len(feats), self.batch_size)
                    ):

                current_feats = feats[start:end]
                current_questions = questions[start:end]

                current_question_matrix = sequence.pad_sequences(current_questions, padding='post', maxlen=self.maxlen+1)
                current_question_matrix = np.hstack( [np.full( (len(current_question_matrix),1), 0), current_question_matrix] ).astype(int)

                current_mask_matrix = np.zeros((current_question_matrix.shape[0], current_question_matrix.shape[1]))
                nonzeros = np.array( map(lambda x: (x != 0).sum()+2, current_question_matrix ))
                #  +2 -> #START# and '.'

                for ind, row in enumerate(current_mask_matrix):
                    row[:nonzeros[ind]] = 1

                _, loss_value = self.sess.run([train_op, self.loss], feed_dict={
                    self.image: current_feats,
                    self.question : current_question_matrix,
                    self.mask : current_mask_matrix
                    })

                if np.mod(counter, 100) == 0:
                    print "Epoch: ", epoch, " batch: ", counter ," Current Cost: ", loss_value
                counter = counter + 1

        if np.mod(epoch, 25) == 0:
                print "Epoch ", epoch, " is done. Saving the model ... "
        self.save_model(epoch)

    def test(self, test_image_path, model_path, maxlen):
    ixtoword = self.dataset['ix_to_word'] 

        images = tf.placeholder("float32", [1, 224, 224, 3])

        image_val = read_image(test_image_path)

        vgg = vgg19.Vgg19()
        with tf.name_scope("content_vgg"):
            vgg.build(images)

        fc7 = self.sess.run(vgg.relu7, feed_dict={images:image_val})

        saver = tf.train.Saver()
        saver.restore(self.sess, model_path)

        generated_word_index = self.sess.run(self.generated_words, feed_dict={self.image:fc7})
        generated_word_index = np.hstack(generated_word_index)

        generated_sentence = ''
        for x in generated_word_index:
             if x==0:
               break
             word = ixtoword[str(x)]
             generated_sentence = generated_sentence + ' ' + word

        print ' '
        print '--------------------------------------------------------------------------------------------------------'
        print generated_sentence

    def save_model(self, epoch):
        if not os.path.exists(self.model_path):
            os.makedirs(self.model_path)
        self.saver.save(self.sess, os.path.join(self.model_path, 'model'), global_step=epoch)

Answer 1

这是一个非常基本的问题。当您运行此代码时，您不了解的是数组（Python中的列表）是0索引的。如果您有一个长度为n的列表，那么当您尝试访问该列表中的第n个元素时，将收到索引错误。

IndexError：索引82459超出了轴0的范围，其大小为82459

1 个答案: