我正在使用word2vec建立模型。训练模型后,我使用了余弦相似度。但我收到以下错误。我正在使用python 3我使用的代码如下:
import numpy as np
from sklearn.metrics.pairwise import cosine_distances
cos_dist =[]
#cos_dist =[cos_dist]
#cos_dist = np.array(cos_dist).reshape(1, -1)
for vec in data[:-1]:
cos_dist.append(float(cosine_distances(vec,data[-1])))
#cos_dist = np.append(cos_dist,
[float(cosine_distances(vec,data[-1]))])
ps = PorterStemmer()
key_list =[]
for j in jd[:-1]:
key = ''
w = set()
for word in keywords(j).split('\n'):
w.add(ps.stem(word))
for x in w:
key += '{} '.format(x)
key_list.append(key)
summary = pd.DataFrame({
'Company': companies,
'Postition': positions,
'Cosine Distances': cos_dist,
'Keywords': key_list,
'Job Description': jd[:-1]
})
z =summary.sort('Cosine Distances', ascending=False)
z.to_csv('Summaryimproved.csv',encoding="utf-8")
#z= z.reshape(1, -1)
我收到以下错误。如果需要其他详细信息,请告诉。
~/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py in
check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy,
force_all_finite, ensure_2d, allow_nd, ensure_min_samples,
ensure_min_features, warn_on_dtype, estimator)
519 "Reshape your data either using
array.reshape(-1, 1) if "
520 "your data has a single feature or
array.reshape(1, -1) "
--> 521 "if it contains a single
sample.".format(array))
522
523 # in the future np.flexible dtypes will be handled like
object dtypes
ValueError: Expected 2D array, got 1D array instead:
array=[-2.84130724e-06 2.24336206e-06 7.59608733e-06 1.09044276e-05