我不太习惯python和机器学习代码。我正在对pytorch CBOW测试进行测试,但它说明了有关索引错误的内容。有人可以帮忙吗?
# model class
class CBOW(nn.Module):
...
def get_word_embedding(self, word):
word = torch.cuda.LongTensor([word_to_ix[word]])
return self.embeddings(word).view(1, -1)
# test method
def test_cbow(model, train_words, word_to_ix):
# test word similarity
word_1 = train_words[2] #randomly chosen word
word_2 = train_words[3] #randomly chosen word
word_1_vec = model.get_word_embedding(word_1)[0].cpu()
word_2_vec = model.get_word_embedding(word_2)[0].cpu()
print(word_1_vec)
print(word_2_vec)
word_similarity = (word_1_vec.dot(word_2_vec) / (torch.norm(word_1_vec) * torch.norm(word_2_vec))).data.numpy()[0]
print("Similarity between '{}' & '{}' : {:0.4f}".format(word_1, word_2, word_similarity))
# executing the test
test_cbow(model, train_words, word_to_ix)
结果如下:
tensor([ 0.8978, 1.0713, -1.6856, -1.0967, -0.0114, 0.4107, -0.4293, -0.7351,
0.4410, -1.5937, -1.3773, 0.7744, 0.0739, -0.3263, 1.0342, 1.0420,
-1.1333, 0.4158, 1.1316, -0.0141, -0.8383, 0.2544, -2.2409, -1.1858,
0.2652, -0.3232, 0.1287, -1.5274, 0.3199, -2.1822, 0.9464, -0.6619,
1.1549, 0.5276, 0.0849, -0.1594, -1.7922, 1.3567, -0.4376, -0.9093,
1.0701, 1.5373, -1.3277, -1.1833, 1.8070, -0.0551, -0.8439, 1.5236,
-0.3890, -0.2306, -0.7392, -1.6435, 0.4485, 0.8988, -0.5958, -0.6989,
1.6123, -1.6668, 0.0583, 0.6698, -0.6998, 1.1942, 0.6355, 0.7437,
-1.0006, -0.5398, 1.3197, 1.3696, -0.3221, 0.9004, 0.6268, 0.0221,
0.0269, -1.7966, -1.6153, -0.1695, -0.0339, -0.5145, 1.5744, -0.3388,
-0.9617, 0.6750, -1.1334, 0.0377, 1.1123, 1.1002, -0.3605, 0.2105,
-1.6570, 1.3818, 0.9183, 0.0274, 0.9072, 0.8414, 0.3424, 0.2199,
1.6546, -0.1357, 1.1291, -0.5309], grad_fn=<CopyBackwards>)
tensor([-0.6263, -0.5639, 2.1590, -0.3659, 0.2862, -0.4542, -0.4825, -0.1776,
-0.4242, 0.9525, 0.7138, -0.3107, 1.8733, -0.3406, 0.0277, 1.6775,
2.1893, 2.0332, 0.7185, 0.0050, -0.1627, -0.1113, 1.0444, 1.4057,
0.2183, 0.3405, 0.0930, 1.2428, -0.0740, 0.3991, -0.2722, 1.4980,
0.9207, 0.5008, -1.9297, 0.5600, 1.6416, 1.1550, 0.1440, 0.0739,
-0.7465, -0.2458, 0.9217, 0.7156, -1.2558, -0.9891, -0.7313, 0.8501,
-1.2851, -0.3068, -0.0796, 0.9361, 0.0927, -1.2988, 0.7422, 0.1388,
1.3895, -0.7935, 0.4008, -0.1338, 1.5563, 0.5864, 0.6606, -0.2341,
0.1218, -0.7313, 0.5073, -0.2941, 0.0316, -2.5356, -0.0885, 2.5765,
0.2090, 0.2819, -0.0386, 0.7986, 2.1165, -0.0271, -0.2987, 0.2905,
0.0149, 0.2403, 0.0752, -1.5535, 0.3794, 2.0638, 1.0603, 0.0703,
-0.3643, -1.5671, -0.4736, -1.3035, 0.6583, 0.2531, 0.9829, -0.6025,
-0.8148, -0.3457, -0.7339, 0.6758], grad_fn=<CopyBackwards>)
我也感到困惑的是,我必须将cuda数据类型转换为numpy,因为我在get_word_embedding方法中使用了cuda。添加.cpu()转换数据类型正确吗?
IndexError Traceback (most recent call last)
<ipython-input-68-39d73aa6e0de> in <module>()
17 print("Similarity between '{}' & '{}' : {:0.4f}".format(word_1, word_2, word_similarity))
18
---> 19 test_cbow(model, train_words, word_to_ix)
<ipython-input-68-39d73aa6e0de> in test_cbow(model, train_words, word_to_ix)
14 print(type(word_1_vec))
15
---> 16 word_similarity = (word_1_vec.dot(word_2_vec) / (torch.norm(word_1_vec) * torch.norm(word_2_vec))).data.numpy()[0]
17 print("Similarity between '{}' & '{}' : {:0.4f}".format(word_1, word_2, word_similarity))
18
**IndexError: too many indices for array**
答案 0 :(得分:1)
在您的代码中,word_similarity
不是数组,因此无法访问它的第0个元素。只需将代码修改为:
word_similarity = (word_1_vec.dot(word_2_vec) / (torch.norm(word_1_vec) * torch.norm(word_2_vec))).data.numpy()
您还可以使用:
word_similarity = (word_1_vec.dot(word_2_vec) / (torch.norm(word_1_vec) * torch.norm(word_2_vec))).item()
在这里,由于相似值是标量,因此PyTorch中的.item()
将直接为您提供浮点值。
何时使用.cpu()?
要将cuda张量转换为cpu张量,需要使用.cpu()
。您不能直接将cuda张量转换为numpy。您必须先转换为cpu tesnor,然后再调用.numpy()
。