Question

我正在尝试使用ngram功能创建模型。我的目标列具有如图所示的所有字符串值

np.unique(y)

array(['Connect, 'Terminate,
       'Dispatch', 'Pending',
       'Credentials'], dtype=object)

以下是我尝试执行的代码的一部分，该代码失败@ nlp_pipeline.fit(X_train, y_train) 执行test_pipeline(calldataoth, mixed_pipe)

时

ngram_pipe = Pipeline([
    ('cv', CountVectorizer(ngram_range=(1, 2))),
    ('mnb', MultinomialNB())
])

unigram_log_pipe = Pipeline([
    ('cv', CountVectorizer()),
    ('logreg', linear_model.LogisticRegression())
])

classifiers = [
    ("ngram", ngram_pipe),
    ("unigram", unigram_log_pipe),
]

mixed_pipe = Pipeline([
    ("voting", VotingClassifier(classifiers, voting="soft"))
])
y = calldataoth['CD'].astype(str)

def test_pipeline(df, nlp_pipeline):

    X = pd.Series(calldataoth['callkeys'])
    rskf = StratifiedKFold(n_splits=5, random_state=1)
    losses = []
    accuracies = []
    for train_index, test_index in rskf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        nlp_pipeline.fit(X_train, y_train)

当我尝试执行以上代码时，出现以下错误

<ipython-input-227-68a4f04f5d00> in test_pipeline(df, nlp_pipeline)
     34         X_train, X_test = X[train_index], X[test_index]
     35         y_train, y_test = y[train_index], y[test_index]
---> 36         nlp_pipeline.fit(X_train, y_train)
     37         print(y = column_or_1d(y, warn=True))
     38         print(ar = np.asanyarray(ar))

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\pipeline.py in fit(self, X, y, **fit_params)
    248         Xt, fit_params = self._fit(X, y, **fit_params)
    249         if self._final_estimator is not None:
--> 250             self._final_estimator.fit(Xt, y, **fit_params)
    251         return self
    252 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\ensemble\voting_classifier.py in fit(self, X, y, sample_weight)
    178                              'required to be a classifier!')
    179 
--> 180         self.le_ = LabelEncoder().fit(y)
    181         self.classes_ = self.le_.classes_
    182         self.estimators_ = []

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\preprocessing\label.py in fit(self, y)
     94         """
     95         y = column_or_1d(y, warn=True)
---> 96         self.classes_ = np.unique(y)
     97         return self
     98 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\numpy\lib\arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
    231     ar = np.asanyarray(ar)
    232     if axis is None:
--> 233         ret = _unique1d(ar, return_index, return_inverse, return_counts)
    234         return _unpack_tuple(ret)
    235 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\numpy\lib\arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
    279         aux = ar[perm]
    280     else:
--> 281         ar.sort()
    282         aux = ar
    283     mask = np.empty(aux.shape, dtype=np.bool_)

TypeError: '<' not supported between instances of 'str' and 'float'

对此表示感谢。非常感谢

'str'和'float'的实例之间不支持'<'

0 个答案: