Question

以下内容来自我在Udacity上进行的深度学习项目。该项目在生成电视脚本上。我遇到的错误是下面的错误。以下功能是模型训练后的功能。

def generate(rnn, prime_id, int_to_vocab, token_dict, pad_value, predict_len=100):
    """
    Generate text using the neural network
    param decoder: The PyTorch Module that holds the trained neural network
    param prime_id: The word id to start the first prediction
    param int_to_vocab: Dict of word id keys to word values
    param token_dict: Dict of puncuation tokens keys to puncuation values
    param pad_value: The value used to pad a sequence
    param predict_len: The length of text to generate
    return: The generated text
    """
    rnn.eval()

    # create a sequence (batch_size=1) with the prime_id
    current_seq = np.full((1, sequence_length), pad_value)
    current_seq[-1][-1] = prime_id
    predicted = [int_to_vocab[prime_id]]

    for _ in range(predict_len):
        if train_on_gpu:
            current_seq = torch.LongTensor(current_seq).cuda()
        else:
            current_seq = torch.LongTensor(current_seq)

        # initialize the hidden state
        hidden = rnn.init_hidden(current_seq.size(0))

        # get the output of the rnn
        output, _ = rnn(current_seq, hidden)

        # get the next word probabilities
        p = F.softmax(output, dim=1).data
        if(train_on_gpu):
            p = p.cpu() # move to cpu

        # use top_k sampling to get the index of the next word
        top_k = 5
        p, top_i = p.topk(top_k)
        top_i = top_i.numpy().squeeze()

        # select the likely next word index with some element of randomness
        p = p.numpy().squeeze()
        word_i = np.random.choice(top_i, p=p/p.sum())

        # retrieve that word from the dictionary
        word = int_to_vocab[word_i]
        predicted.append(word)     

        # the generated word becomes the next "current sequence" and the cycle can continue
        current_seq = np.roll(current_seq, -1, 1)
        current_seq[-1][-1] = word_i

    gen_sentences = ' '.join(predicted)

    # Replace punctuation tokens
    for key, token in token_dict.items():
        ending = ' ' if key in ['\n', '(', '"'] else ''
        gen_sentences = gen_sentences.replace(' ' + token.lower(), key)
    gen_sentences = gen_sentences.replace('\n ', '\n')
    gen_sentences = gen_sentences.replace('( ', '(')

    # return all the sentences
    return gen_sentences

此后，将运行以下代码：

# run the cell multiple times to get different results!
gen_length = 400 # modify the length to your preference
prime_word = 'jerry' # name for starting the script

pad_word = helper.SPECIAL_WORDS['PADDING']
generated_script = generate(trained_rnn, vocab_to_int[prime_word + ':'], int_to_vocab, token_dict, vocab_to_int[pad_word], gen_length)
print(generated_script)

运行此代码后，出现以下错误

TypeError                                 Traceback (most recent call last)
<ipython-input-40-68a17c4d1704> in <module>()
      7 """
      8 pad_word = helper.SPECIAL_WORDS['PADDING']
----> 9 generated_script = generate(trained_rnn, vocab_to_int[prime_word + ':'], int_to_vocab, token_dict, vocab_to_int[pad_word], gen_length)
     10 print(generated_script)

3 frames
<ipython-input-39-b86c7a305356> in generate(rnn, prime_id, int_to_vocab, token_dict, pad_value, predict_len)
     53 
     54         # the generated word becomes the next "current sequence" and the cycle can continue
---> 55         current_seq = np.roll(current_seq, -1, 1)
     56         current_seq[-1][-1] = word_i
     57 

<__array_function__ internals> in roll(*args, **kwargs)

/usr/local/lib/python3.6/dist-packages/numpy/core/numeric.py in roll(a, shift, axis)
   1179 
   1180     """
-> 1181     a = asanyarray(a)
   1182     if axis is None:
   1183         return roll(a.ravel(), shift, 0).reshape(a.shape)

/usr/local/lib/python3.6/dist-packages/numpy/core/_asarray.py in asanyarray(a, dtype, order)
    136 
    137     """
--> 138     return array(a, dtype, copy=False, order=order, subok=True)
    139 
    140 

/usr/local/lib/python3.6/dist-packages/torch/tensor.py in __array__(self, dtype)
    490     def __array__(self, dtype=None):
    491         if dtype is None:
--> 492             return self.numpy()
    493         else:
    494             return self.numpy().astype(dtype, copy=False)

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

有人可以帮我吗？

Answer 1

np.roll(current_seq, -1, 1)要求输入为NumPy数组，但是current_seq为张量，因此它尝试将其转换为NumPy数组，但失败，因为张量在GPU上。为了将其转换为NumPy数组，您需要在CPU上具有张量。

current_seq = np.roll(current_seq.cpu(), -1, 1)

TypeError：无法将cuda：0设备类型张量转换为numpy。使用Tensor.cpu（）首先将张量复制到主机内存

1 个答案: