我一直在尝试使用gensim的word2vec实现嵌入层。我已经使用pandas加载了数据,数据是文本类型,涉及到word2vec部分:
embedding_weights = train_word2vec(y_train, vocab['w2idx'],
num_features=embedding_dim, min_word_count=min_word_count, context=context)
input_shape = (sequence_length,)
model_input = Input(shape=input_shape)
layer = Embedding(len(vocab['idx2w']), embedding_dim,
input_length=sequence_length, name="embedding")(model_input)
layer = Dropout(dropout_prob[0])(layer)
我不断收到此错误:
File "<ipython-input-9-423d0e432e5b>", line 3, in <module>
min_word_count=min_word_count, context=context)
File "C:\Users\ACER\Pod_Dsgn_Chatbot\Wor2vec.py", line 43, in
train_word2vec
for key, word in vocabulary_inv.items()}
File "C:\Users\ACER\Pod_Dsgn_Chatbot\Wor2vec.py", line 43, in <dictcomp>
for key, word in vocabulary_inv.items()}
File "C:\Users\ACER\Anaconda3\envs\py37\lib\site-
packages\gensim\utils.py", line 1398, in new_func1
return func(*args, **kwargs)
File "C:\Users\ACER\Anaconda3\envs\py37\lib\site-
packages\gensim\models\word2vec.py", line 821, in __getitem__
return self.wv.__getitem__(words)
File "C:\Users\ACER\Anaconda3\envs\py37\lib\site-
packages\gensim\models\keyedvectors.py", line 171, in __getitem__
return vstack([self.get_vector(entity) for entity in entities])
TypeError: 'int' object is not iterable
我不知道如何解决它,并且互联网上没有太多信息,而且该错误与所示代码的第一行有关。最后请注意,我清除了数据,并删除了每个整数,谢谢
编辑:这是发生问题的函数,PS:我没有开发这个
def train_word2vec(sentence_matrix, vocabulary_inv,
num_features=300, min_word_count=1, context=10):
model_dir = 'models'
model_name = "{:d}features_{:d}minwords_{:d}context".format(num_features,
min_word_count, context)
model_name = join(model_dir, model_name)
if exists(model_name):
embedding_model = word2vec.Word2Vec.load(model_name)
print('Load existing Word2Vec model \'%s\'' % split(model_name)[-1])
else:
num_workers = 2
downsampling = 1e-3
print('Training Word2Vec model...')
sentences = [[vocabulary_inv[w] for w in s] for s in sentence_matrix]
embedding_model = word2vec.Word2Vec(sentences, workers=num_workers,
size=num_features, min_count=min_word_count, window=context,
sample=downsampling)
embedding_model.init_sims(replace=True)
if not exists(model_dir):
os.mkdir(model_dir)
print('Saving Word2Vec model \'%s\'' % split(model_name)[-1])
embedding_model.save(model_name)
pyplot.scatter( embedding_model)
embedding_weights = {key: embedding_model[word] if word in
embedding_model
else
np.random.uniform(-0.25, 0.25,
embedding_model.vector_size)
for key, word in vocabulary_inv.items()}
return embedding_weights