神经机器翻译:IndexError:尺寸指定为0但张量没有尺寸

时间:2020-07-24 03:38:29

标签: python tensorflow machine-learning neural-network tensor

我正在训练我的数据集,但是当我调用训练方法时,出现一个错误,该错误表明我的张量没有维数。谁能告诉我如何解决此问题并确定张量是否具有维数?发生异常的方法的代码在这里和调用它的方法在这里。

METHODS

METHOD
def pad_sequence(sequences, batch_first=False, padding_value =0):
    r"""Pad a list of variable length Tensors with ``padding_value``

    ``pad_sequence`` stacks a list of Tensors along a new dimension,
    and pads them to equal length. For example, if the input is list of
    sequences with size ``L x *`` and if batch_first is False, and ``T x B x *``
    otherwise.

    `B` is batch size. It is equal to the number of elements in ``sequences``.
    `T` is length of the longest sequence.
    `L` is length of the sequence.
    `*` is any number of trailing dimensions, including none.

    Example:
        >>> from torch.nn.utils.rnn import pad_sequence
        >>> a = torch.ones(25, 300)
        >>> b = torch.ones(22, 300)
        >>> c = torch.ones(15, 300)
        >>> pad_sequence([a, b, c]).size()
        torch.Size([25, 3, 300])

    Note:
        This function returns a Tensor of size ``T x B x *`` or ``B x T x *``
        where `T` is the length of the longest sequence. This function assumes
        trailing dimensions and type of all the Tensors in sequences are same.

    Arguments:
        sequences (list[Tensor]): list of variable length sequences.
        batch_first (bool, optional): output will be in ``B x T x *`` if True, or in
            ``T x B x *`` otherwise
        padding_value (float, optional): value for padded elements. Default: 0.

    Returns:
        Tensor of size ``T x B x *`` if :attr:`batch_first` is ``False``.
        Tensor of size ``B x T x *`` otherwise
    """
    print("test "+ str(padding_value))
    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
    max_size = sequences[0].size()
    trailing_dims = max_size[1:]
    max_len = max([s.size(0) for s in sequences])
    if batch_first:
        out_dims = (len(sequences), max_len) + trailing_dims
    else:
        out_dims = (max_len, len(sequences)) + trailing_dims

    out_tensor = sequences[0].data.new(*out_dims).fill_(padding_value)
    for i, tensor in enumerate(sequences):
        length = tensor.size(0)
        # use index notation to prevent duplicate references to the tensor
        if batch_first:
            out_tensor[i, :length, ...] = tensor
        else:
            out_tensor[:length, i, ...] = tensor

    return out_tensor


METHOD
def pad_batch(batch):
    
    #for i in batch:
     # print("1" + str(i[0]))
      #print("2" + str(i[1]))
    padded_inputs = pad_sequence(batch[0],batch_first=True,padding_value=EOS_token)
    padded_targets = pad_sequence(batch[1],batch_first=False,padding_value=EOS_token)
    return (padded_inputs, padded_targets)

METHOD
"""seperates data into batches of size batch_size"""
def batchify(data, input_lang, batch_size, shuffle_data=True):
    if shuffle_data == True:
        shuffle(data)
    number_of_batches = len(data) // batch_size
    batches = list(range(number_of_batches))
    longest_elements = list(range(number_of_batches))
   
    for batch_number in range(number_of_batches):
        longest_input = 0
        longest_target = 0
        input_variables = list(range(batch_size))
         #target_variables = list(range(batch_size))
        index = 0      
        for pair in range((batch_number*batch_size),((batch_number+1)*batch_size)):
            input_variables[index]  = tensorsFromPair(input_lang, data[pair])
            #print(range((batch_number*batch_size),((batch_number+1)*batch_size)))
            if len(input_variables[index]) >= longest_input:
                longest_input = len(input_variables[index])
            #if len(target_variables[index]) >= longest_target:
                #longest_target = len(target_variables[index])
            index += 1
        batches[batch_number] = (input_variables)
        longest_elements[batch_number] = (longest_input)
    return batches , longest_elements, number_of_batches

METHOD
def train_and_test(epochs, test_eval_every, plot_every, learning_rate, 
                   lr_schedule, train_pairs, test_pairs, input_lang
                  , batch_size, test_batch_size, encoder, decoder, 
                   loss_criterion, trim, save_weights):
    
    times = []
    losses = {'train set':[], 'test set': []}

    test_batches, longest_seq, n_o_b = batchify(test_pairs, input_lang, 
                                               test_batch_size, 
                                              shuffle_data=False)

    start = time.time()
    for i in range(1,epochs+1):
    
        #adjust the learning rate according to the learning rate schedule
        #specified in lr_schedule
        if i in lr_schedule.keys():
            learning_rate /= lr_schedule.get(i)


        encoder.train()
        decoder.train()

        encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
        decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

        batches, longest_seq, n_o_b = batchify(train_pairs, input_lang, 
                                            batch_size, 
                                           shuffle_data=True)
        train_loss = train(batches, encoder, decoder, encoder_optimizer, 
                       decoder_optimizer, loss_criterion)
        
        now = time.time()
        print("Iter: %s \nLearning Rate: %s \nTime: %s \nTrain Loss: %s \n" 
#           % (i, learning_rate, asHours(now-start), train_loss))

        if create_txt:
            with open(print_to, 'a') as f:
                f.write("Iter: %s \nLeaning Rate: %s \nTime: %s \nTrain Loss: %s \n" \
#                   % (i, learning_rate, asHours(now-start), train_loss))

        if i % test_eval_every == 0:
            if test_pairs:
                test_loss = test(test_batches, encoder, decoder, criterion)
                print("Test set loss: %s" % (test_loss))
                if create_txt:
                    with open(print_to, 'a') as f:
                        f.write("Test Loss: %s \n" % (test_loss))
                evaluate_randomly(encoder, decoder, test_pairs, trim)
            else:
                evaluate_randomly(encoder, decoder, train_pairs, trim)

        if i % plot_every == 0:
            times.append((time.time()-start)/60)
            losses['train set'].append(train_loss)
            if test_pairs:
                losses['test set'].append(test_loss)
            showPlot(times, losses, output_file_name)
            if save_weights:
                torch.save(encoder.state_dict(), output_file_name+'_enc_weights.pt')
                torch.save(decoder.state_dict(), output_file_name+'_dec_weights.pt')


CALLING THE TRAINING METHOD
input_lang_name = 'test1.txt'
#output_lang_name = 'en'

"""name of your dataset"""
dataset = 'orig'

"""file path of dataset in the form of a tuple. If translated sentences are
stored in two files, this tuple will have two elements"""
raw_data_file_path = ('test1.txt',)

"""True if you want to reverse the order of the sentence pairs. For example, 
in our dataset the sentence pairs list the English sentence first followed by
the French translation. But we want to translate from French to English,
so we set reverse as True."""
reverse=True

"""Remove sentences from dataset that are longer than trim (in either language)"""
trim = 10

"""max number of words in the vocabulary for both languages"""
max_vocab_size= 20000

"""if true removes sentences from the dataset that don't start with eng_prefixes.
Typically will want to use False, but implemented to compare results with Pytorch
tutorial. Can also change the eng_prefixes to prefixes of other languages or
other English prefixes. Just be sure that the prefixes apply to the OUTPUT
language (i.e. the language that the model is translating to NOT from)"""
start_filter = True

"""denotes what percentage of the data to use as training data. the remaining 
percentage becomes test data. Typically want to use 0.8-0.9. 1.0 used here to 
compare with PyTorch results where no test set was utilized"""
perc_train_set = 1.0

"""OUTPUT OPTIONS"""

"""denotes how often to evaluate a loss on the test set and print
sample predictions on the test set.
if no test set, simply prints sample predictions on the train set."""
test_eval_every = 1

"""denotes how often to plot the loss values of train and test (if applicable)"""
plot_every = 1

"""if true creates a txt file of the output"""
create_txt = True

"""if true saves the encoder and decoder weights to seperate .pt files for later use"""
save_weights= False

"""HYPERPARAMETERS: FEEL FREE TO PLAY WITH THESE TO TRY TO ACHIEVE BETTER RESULTS"""

"""signifies whether the Encoder and Decoder should be bidirectional LSTMs or not"""
bidirectional = True
if bidirectional:
    directions = 2
else:
    directions = 1

"""number of layers in both the Encoder and Decoder"""
layers = 2

"""Hidden size of the Encoder and Decoder"""
hidden_size = 440

"""Dropout value for Encoder and Decoder"""
dropout = 0.2

"""Training set batch size"""
batch_size = 32

"""Test set batch size"""
test_batch_size = 32

"""number of epochs (full passes through the training data)"""
epochs = 100

"""Initial learning rate"""
learning_rate= 1

"""Learning rate schedule. Signifies by what factor to divide the learning rate
at a certain epoch. For example {5:10} would divide the learning rate by 10
before the 5th epoch and {5:10, 10:100} would divide the learning rate by 10
before the 5th epoch and then again by 100 before the 10th epoch"""
lr_schedule = {}

"""loss criterion, see https://pytorch.org/docs/stable/nn.html for other options"""
criterion = nn.NLLLoss()

# Commented out IPython magic to ensure Python compatibility.
"""******************************************************************
********************NO NEED TO ALTER ANYTHING BELOW******************
******************************************************************"""

use_cuda = torch.cuda.is_available()


"""for plotting of the loss"""
plt.switch_backend('agg')

output_file_name = "testdata.%s_trim.%s_vocab.%s_directions.%s_layers.%s_hidden.%s_dropout.%s_learningrate.%s_batch.%s_epochs.%s" % (dataset,trim,max_vocab_size,directions,layers,hidden_size,dropout,learning_rate,batch_size,epochs)

if create_txt:
    print_to = output_file_name+'.txt'
    with open(print_to, 'w+') as f:
        f.write("Starting Training \n")
else:
    print_to = None

input_lang, train_pairs, test_pairs = prepareData(
    input_lang_name, raw_data_file_path, 
    max_vocab_size=max_vocab_size, reverse=reverse, trim=trim, 
    start_filter=start_filter, perc_train_set=perc_train_set, print_to=print_to)
print('Train Pairs #')
print(len(train_pairs))


"""for gradient clipping from 
https://github.com/pytorch/examples/blob/master/word_language_model/main.py"""
parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 RNN/LSTM Language Model')
parser.add_argument('--clip', type=float, default=0.25,
                    help='gradient clipping')
args = parser.parse_args()

mem()

if create_txt:
    with open(print_to, 'a') as f:
        f.write("\nRandom Train Pair: %s \n\nRandom Test Pair: %s \n\n" 
#             % (random.choice(train_pairs),random.choice(test_pairs) 
               if test_pairs else "None"))
        f.write(mem())


"""create the Encoder"""
encoder = EncoderRNN(input_lang.vocab_size, hidden_size, layers=layers, 
                     dropout=dropout, bidirectional=bidirectional)

"""create the Decoder"""
decoder = DecoderAttn(hidden_size, input_lang.vocab_size, layers=layers, 
                      dropout=dropout, bidirectional=bidirectional)

print('Encoder and Decoder Created')
mem()

if use_cuda:
    print('Cuda being used')
    encoder = encoder.cuda()
    decoder = decoder.cuda()

print('Number of epochs: '+str(epochs))

if create_txt:
    with open(print_to, 'a') as f:
        f.write('Encoder and Decoder Created\n')
        f.write(mem())
        f.write("Number of epochs %s \n" % (epochs))

train_and_test(epochs, test_eval_every, plot_every, learning_rate, lr_schedule, 
               train_pairs, test_pairs, input_lang, batch_size, 
               test_batch_size, encoder, decoder, criterion, trim, save_weights)





Error Message
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-359-5d8455e6bed2> in <module>()
     70 train_and_test(epochs, test_eval_every, plot_every, learning_rate, lr_schedule, 
     71                train_pairs, test_pairs, input_lang, batch_size,
---> 72                test_batch_size, encoder, decoder, criterion, trim, save_weights)

4 frames
<ipython-input-354-56663cee9c3b> in train_and_test(epochs, test_eval_every, plot_every, learning_rate, lr_schedule, train_pairs, test_pairs, input_lang, batch_size, test_batch_size, encoder, decoder, loss_criterion, trim, save_weights)
     36                                            shuffle_data=True)
     37         train_loss = train(batches, encoder, decoder, encoder_optimizer, 
---> 38                        decoder_optimizer, loss_criterion)
     39 
     40                 now = time.time()

<ipython-input-348-941378bc39df> in train(train_batches, encoder, decoder, encoder_optimizer, decoder_optimizer, loss_criterion)
      8         for batch in train_batches:
      9                 i += 1
---> 10                 (input_batch, target_batch) = pad_batch(batch)
     11                 batch_loss = train_batch(input_batch, target_batch, encoder, decoder, encoder_optimizer, decoder_optimizer, loss_criterion)
     12                 round_loss += batch_loss

<ipython-input-343-bfbf80f81df7> in pad_batch(batch)
     33      # print("1" + str(i[0]))
     34       #print("2" + str(i[1]))
---> 35     padded_inputs = pad_sequence(batch[0],batch_first=True,padding_value=EOS_token)
     36     padded_targets = pad_sequence(batch[1],batch_first=False,padding_value=EOS_token)
     37     return (padded_inputs, padded_targets)

<ipython-input-337-c13d37c75733> in pad_sequence(sequences, batch_first, padding_value)
     40     max_size = sequences[0].size()
     41     trailing_dims = max_size[1:]
---> 42     max_len = max([s.size(0) for s in sequences])
     43     if batch_first:
     44         out_dims = (len(sequences), max_len) + trailing_dims

<ipython-input-337-c13d37c75733> in <listcomp>(.0)
     40     max_size = sequences[0].size()
     41     trailing_dims = max_size[1:]
---> 42     max_len = max([s.size(0) for s in sequences])
     43     if batch_first:
     44         out_dims = (len(sequences), max_len) + trailing_dims

IndexError: dimension specified as 0 but tensor has no dimensions

1 个答案:

答案 0 :(得分:0)

正如 here 所解释的,当最后一批只包含一个样本时会出现问题。 一种简单的解决方法是从您的集合中移除一个样本,或者更改您的批次大小。

相关问题