我正在尝试使用sklearn进行情感分析。我已经加载了数据并创建了词向量化,以进行分类,这里我试图将svm模型上的X和Y拟合,但是它会导致类型错误
下面您可以看到我的代码和错误消息。
请让我知道此问题的解决方法。
预先感谢
from sklearn.model_selection import train_test_split
training, test= train_test_split(reviews, test_size=0.33, random_state=42)
train_x = [x.text for x in training]
train_y = [x.sentiment for x in training]
test_x = [x.text for x in test]
test_y = [x.sentiment for x in test]
train_X[0]
train_Y[0]
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
train_x_Vector = vectorizer.fit_transform(train_x)
test_x_vector = vectorizer.transform(test_x)
print(train_x[0])
print(train_x_Vector[0])
from sklearn import svm
clf_svm = svm.SVC(kernel='linear')
clf_svm.fit(train_x_Vector, train_y)
-----------------------------------------------------------------------
TypeError
Traceback (most recent call last)
<ipython-input-283-1cb540dee78d> in <module>
3 clf_svm = svm.SVC(kernel='linear')
4
----> 5 clf_svm.fit(test_x_vector, test_y)
6
7 test_x[0]
/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py in fit(self, X, y, sample_weight)
145 order='C', accept_sparse='csr',
146 accept_large_sparse=False)
--> 147 y = self._validate_targets(y)
148
149 sample_weight = np.asarray([]
/opt/anaconda3/lib/python3.7/site-packages/sklearn/svm/base.py in _validate_targets(self, y)
513 def _validate_targets(self, y):
514 y_ = column_or_1d(y, warn=True)
--> 515 check_classification_targets(y)
516 cls, y = np.unique(y_, return_inverse=True)
517 self.class_weight_ = compute_class_weight(self.class_weight, cls, y_)
/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/multiclass.py in check_classification_targets(y)
164 y : array-like
165 """
--> 166 y_type = type_of_target(y)
167 if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
168 'multilabel-indicator', 'multilabel-sequences']:
/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/multiclass.py in type_of_target(y)
285 return 'continuous' + suffix
286
--> 287 if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1):
288 return 'multiclass' + suffix # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]]
289 else:
<__array_function__ internals> in unique(*args, **kwargs)
/opt/anaconda3/lib/python3.7/site-packages/numpy/lib/arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
260 ar = np.asanyarray(ar)
261 if axis is None:
--> 262 ret = _unique1d(ar, return_index, return_inverse, return_counts)
263 return _unpack_tuple(ret)
264
/opt/anaconda3/lib/python3.7/site-packages/numpy/lib/arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
308 aux = ar[perm]
309 else:
--> 310 ar.sort()
311 aux = ar
312 mask = np.empty(aux.shape, dtype=np.bool_)
TypeError: '<' not supported between instances of 'NoneType' and 'str'