这是我的代码:
def extract_doc2vec(doc, w2v):
vecs = [w2v[word] for word in doc if word in w2v]
if len(vecs) > 0:
vecs = np.asarray(vecs).sum(axis=0)
return vecs
y = newsgroups_all['target']
x= []
for doc in newsgroups_all['data']:
edv = extract_doc2vec(list(gensim.utils.tokenize(doc)),model.wv)
x.append(edv)
我必须将retrain_doc转换为
for train_index, test_index in sssd.split(x, y):
X_train, X_test = x[train_index], x[test_index]
y_train, y_test = y[train_index], y[test_index]
答案 0 :(得分:0)
将列表x转换为数据框,然后将其转换为numpy数组并应用随机拆分
data = pd.DataFrame(x).to_numpy()