尝试在scikit-learn项目中导入eli5时出错

时间:2018-08-02 17:32:08

标签: python-3.x scikit-learn

我正在遵循此代码示例使用scikit-learn进行命名实体识别 来自该网站https://www.depends-on-the-definition.com/named-entity-recognition-conditional-random-fields-python/

    import pandas as pd
    import numpy as np

    data = pd.read_csv("ner_dataset.csv", encoding="latin1")
    data = data.fillna(method="ffill")
    outData = data.tail(10)

    print(outData)

    words = list(set(data["Word"].values))
    n_words = len(words)
    print(n_words)

    class SentenceGetter(object):

        def __init__(self, data):
            self.n_sent = 1
            self.data = data
            self.empty = False
            agg_func = lambda s: [(w, p, t) for w, p, t in zip(s["Word"].values.tolist(),
                                                               s["POS"].values.tolist(),
                                                               s["Tag"].values.tolist())]
            self.grouped = self.data.groupby("Sentence #").apply(agg_func)
            self.sentences = [s for s in self.grouped]

        def get_next(self):
            try:
                s = self.grouped["Sentence: {}".format(self.n_sent)]
                self.n_sent += 1
                return s
            except:
                return None

    getter = SentenceGetter(data)

    sent = getter.get_next()
    print(sent)

    sentences = getter.sentences

    def word2features(sent, i):
        word = sent[i][0]
        postag = sent[i][1]

        features = {
            'bias': 1.0,
            'word.lower()': word.lower(),


'word[-3:]': word[-3:],
        'word[-2:]': word[-2:],
        'word.isupper()': word.isupper(),
        'word.istitle()': word.istitle(),
        'word.isdigit()': word.isdigit(),
        'postag': postag,
        'postag[:2]': postag[:2],
    }
    if i > 0:
        word1 = sent[i-1][0]
        postag1 = sent[i-1][1]
        features.update({
            '-1:word.lower()': word1.lower(),
            '-1:word.istitle()': word1.istitle(),
            '-1:word.isupper()': word1.isupper(),
            '-1:postag': postag1,
            '-1:postag[:2]': postag1[:2],
        })
    else:
        features['BOS'] = True

    if i < len(sent)-1:
        word1 = sent[i+1][0]
        postag1 = sent[i+1][1]
        features.update({
            '+1:word.lower()': word1.lower(),
            '+1:word.istitle()': word1.istitle(),
            '+1:word.isupper()': word1.isupper(),
            '+1:postag': postag1,
            '+1:postag[:2]': postag1[:2],
        })
    else:
        features['EOS'] = True

    return features


def sent2features(sent):
    return [word2features(sent, i) for i in range(len(sent))]

def sent2labels(sent):
    return [label for token, postag, label in sent]

def sent2tokens(sent):
    return [token for token, postag, label in sent]


X = [sent2features(s) for s in sentences]
y = [sent2labels(s) for s in sentences]

from sklearn_crfsuite import CRF

crf = CRF(algorithm='lbfgs',
          c1=0.1,
          c2=0.1,
          max_iterations=100,
          all_possible_transitions=False)

print("here")     

from sklearn.cross_validation import cross_val_predict
from sklearn_crfsuite.metrics import flat_classification_report

pred = cross_val_predict(estimator=crf, X=X, y=y, cv=5)

report = flat_classification_report(y_pred=pred, y_true=y)
print(report)

fit = crf.fit(X, y)

print(fit)

import eli5

eli5.show_weights(crf, top=30)

print("here")

进入导入eli5模块时,它在终端窗口中失败,并显示以下输出。

D:\Python\Python37-32\lib\site-packages\sklearn\ensemble\weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
  from numpy.core.umath_tests import inner1d
Traceback (most recent call last):
  File "d:\SOURCECODE\VisualStudioCode\hello\main.py", line 120, in <module>
    import eli5
  File "D:\Python\Python37-32\lib\site-packages\eli5\__init__.py", line 13, in <module>
    from .sklearn import explain_weights_sklearn, explain_prediction_sklearn
  File "D:\Python\Python37-32\lib\site-packages\eli5\sklearn\__init__.py", line 3, in <module>
    from .explain_weights import (
  File "D:\Python\Python37-32\lib\site-packages\eli5\sklearn\explain_weights.py", line 197, in <module>
    feature_filter=None,
  File "D:\Python\Python37-32\lib\site-packages\eli5\sklearn\explain_weights.py", line 161, in deco
    explain_weights_sklearn.register(cls)(f))
  File "D:\Python\Python37-32\lib\site-packages\singledispatch.py", line 202, in <lambda>
    return lambda f: register(cls, f)
  File "D:\Python\Python37-32\lib\site-packages\singledispatch.py", line 205, in register
    ns.cache_token = get_cache_token()
  File "D:\Python\Python37-32\lib\site-packages\singledispatch_helpers.py", line 159, in get_cache_token
    return ABCMeta._abc_invalidation_counter
AttributeError: type object 'ABCMeta' has no attribute '_abc_invalidation_counter'

1 个答案:

答案 0 :(得分:0)

在macOS上,它是通过以下命令解决的。

$ conda install -c conda-forge eli5