在训练word2vec之后,我无法对向量进行标准化。我遇到错误:
输入包含NaN,无穷大或dtype('float64')太大的值。
我的代码和错误跟踪如下。我的word2vec向量是什么类型-它们是列表吗?而我该如何消除此错误?
# average Word2Vec
# compute average word2vec for each review of train Dataset
sent_vectors_train = [];
for sent in list_of_sent_train: # for each review/sentence
sent_vec = np.zeros(50) # as word vectors are of zero length
cnt_words =0; # num of words with a valid vector in the sentence/review
for word in sent: # for each word in a review/sentence
try:
vec = w2v_model.wv[word]
sent_vec += vec
cnt_words += 1
except:
pass
sent_vec /= cnt_words
sent_vectors_train.append(sent_vec)
print(len(sent_vectors_train))
print(len(sent_vectors_train[0]))
output:-7000
50
from sklearn.preprocessing import StandardScaler
standardized_data =
StandardScaler(with_mean=False).fit_transform(sent_vectors_train)
print(standardized_data.shape)
ValueError Traceback (most recent call last)
<ipython-input-47-cb2854c305eb> in <module>()
1 #from sklearn.preprocessing import StandardScaler
2 from sklearn.preprocessing import StandardScaler
----> 3 standardized_data = StandardScaler(with_mean=False).fit_transform(sent_vectors_train)
4 print(standardized_data.shape)
~\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
515 if y is None:
516 # fit method of arity 1 (unsupervised transformation)
--> 517 return self.fit(X, **fit_params).transform(X)
518 else:
519 # fit method of arity 2 (supervised transformation)
~\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py in fit(self, X, y)
588 # Reset internal state before fitting
589 self._reset()
--> 590 return self.partial_fit(X, y)
591
592 def partial_fit(self, X, y=None):
~\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py in partial_fit(self, X, y)
610 """
611 X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
--> 612 warn_on_dtype=True, estimator=self, dtype=FLOAT_DTYPES)
613
614 # Even in the case of `with_mean=False`, we update the mean anyway
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
451 % (array.ndim, estimator_name))
452 if force_all_finite:
--> 453 _assert_all_finite(array)
454
455 shape_repr = _shape_repr(array.shape)
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in _assert_all_finite(X)
42 and not np.isfinite(X).all()):
43 raise ValueError("Input contains NaN, infinity"
---> 44 " or a value too large for %r." % X.dtype)
45
46
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').