我正在尝试使用ngram功能创建模型。 我的目标列具有如图所示的所有字符串值
np.unique(y)
array(['Connect, 'Terminate,
'Dispatch', 'Pending',
'Credentials'], dtype=object)
以下是我尝试执行的代码的一部分,该代码失败@ nlp_pipeline.fit(X_train, y_train)
执行test_pipeline(calldataoth, mixed_pipe)
ngram_pipe = Pipeline([
('cv', CountVectorizer(ngram_range=(1, 2))),
('mnb', MultinomialNB())
])
unigram_log_pipe = Pipeline([
('cv', CountVectorizer()),
('logreg', linear_model.LogisticRegression())
])
classifiers = [
("ngram", ngram_pipe),
("unigram", unigram_log_pipe),
]
mixed_pipe = Pipeline([
("voting", VotingClassifier(classifiers, voting="soft"))
])
y = calldataoth['CD'].astype(str)
def test_pipeline(df, nlp_pipeline):
X = pd.Series(calldataoth['callkeys'])
rskf = StratifiedKFold(n_splits=5, random_state=1)
losses = []
accuracies = []
for train_index, test_index in rskf.split(X, y):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
nlp_pipeline.fit(X_train, y_train)
当我尝试执行以上代码时,出现以下错误
<ipython-input-227-68a4f04f5d00> in test_pipeline(df, nlp_pipeline)
34 X_train, X_test = X[train_index], X[test_index]
35 y_train, y_test = y[train_index], y[test_index]
---> 36 nlp_pipeline.fit(X_train, y_train)
37 print(y = column_or_1d(y, warn=True))
38 print(ar = np.asanyarray(ar))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
248 Xt, fit_params = self._fit(X, y, **fit_params)
249 if self._final_estimator is not None:
--> 250 self._final_estimator.fit(Xt, y, **fit_params)
251 return self
252
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\ensemble\voting_classifier.py in fit(self, X, y, sample_weight)
178 'required to be a classifier!')
179
--> 180 self.le_ = LabelEncoder().fit(y)
181 self.classes_ = self.le_.classes_
182 self.estimators_ = []
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\preprocessing\label.py in fit(self, y)
94 """
95 y = column_or_1d(y, warn=True)
---> 96 self.classes_ = np.unique(y)
97 return self
98
~\AppData\Local\Continuum\anaconda3\lib\site-packages\numpy\lib\arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
231 ar = np.asanyarray(ar)
232 if axis is None:
--> 233 ret = _unique1d(ar, return_index, return_inverse, return_counts)
234 return _unpack_tuple(ret)
235
~\AppData\Local\Continuum\anaconda3\lib\site-packages\numpy\lib\arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
279 aux = ar[perm]
280 else:
--> 281 ar.sort()
282 aux = ar
283 mask = np.empty(aux.shape, dtype=np.bool_)
TypeError: '<' not supported between instances of 'str' and 'float'
对此表示感谢。非常感谢