我正在遵循此代码示例使用scikit-learn进行命名实体识别 来自该网站https://www.depends-on-the-definition.com/named-entity-recognition-conditional-random-fields-python/
import pandas as pd
import numpy as np
data = pd.read_csv("ner_dataset.csv", encoding="latin1")
data = data.fillna(method="ffill")
outData = data.tail(10)
print(outData)
words = list(set(data["Word"].values))
n_words = len(words)
print(n_words)
class SentenceGetter(object):
def __init__(self, data):
self.n_sent = 1
self.data = data
self.empty = False
agg_func = lambda s: [(w, p, t) for w, p, t in zip(s["Word"].values.tolist(),
s["POS"].values.tolist(),
s["Tag"].values.tolist())]
self.grouped = self.data.groupby("Sentence #").apply(agg_func)
self.sentences = [s for s in self.grouped]
def get_next(self):
try:
s = self.grouped["Sentence: {}".format(self.n_sent)]
self.n_sent += 1
return s
except:
return None
getter = SentenceGetter(data)
sent = getter.get_next()
print(sent)
sentences = getter.sentences
def word2features(sent, i):
word = sent[i][0]
postag = sent[i][1]
features = {
'bias': 1.0,
'word.lower()': word.lower(),
'word[-3:]': word[-3:],
'word[-2:]': word[-2:],
'word.isupper()': word.isupper(),
'word.istitle()': word.istitle(),
'word.isdigit()': word.isdigit(),
'postag': postag,
'postag[:2]': postag[:2],
}
if i > 0:
word1 = sent[i-1][0]
postag1 = sent[i-1][1]
features.update({
'-1:word.lower()': word1.lower(),
'-1:word.istitle()': word1.istitle(),
'-1:word.isupper()': word1.isupper(),
'-1:postag': postag1,
'-1:postag[:2]': postag1[:2],
})
else:
features['BOS'] = True
if i < len(sent)-1:
word1 = sent[i+1][0]
postag1 = sent[i+1][1]
features.update({
'+1:word.lower()': word1.lower(),
'+1:word.istitle()': word1.istitle(),
'+1:word.isupper()': word1.isupper(),
'+1:postag': postag1,
'+1:postag[:2]': postag1[:2],
})
else:
features['EOS'] = True
return features
def sent2features(sent):
return [word2features(sent, i) for i in range(len(sent))]
def sent2labels(sent):
return [label for token, postag, label in sent]
def sent2tokens(sent):
return [token for token, postag, label in sent]
X = [sent2features(s) for s in sentences]
y = [sent2labels(s) for s in sentences]
from sklearn_crfsuite import CRF
crf = CRF(algorithm='lbfgs',
c1=0.1,
c2=0.1,
max_iterations=100,
all_possible_transitions=False)
print("here")
from sklearn.cross_validation import cross_val_predict
from sklearn_crfsuite.metrics import flat_classification_report
pred = cross_val_predict(estimator=crf, X=X, y=y, cv=5)
report = flat_classification_report(y_pred=pred, y_true=y)
print(report)
fit = crf.fit(X, y)
print(fit)
import eli5
eli5.show_weights(crf, top=30)
print("here")
进入导入eli5模块时,它在终端窗口中失败,并显示以下输出。
D:\Python\Python37-32\lib\site-packages\sklearn\ensemble\weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
from numpy.core.umath_tests import inner1d
Traceback (most recent call last):
File "d:\SOURCECODE\VisualStudioCode\hello\main.py", line 120, in <module>
import eli5
File "D:\Python\Python37-32\lib\site-packages\eli5\__init__.py", line 13, in <module>
from .sklearn import explain_weights_sklearn, explain_prediction_sklearn
File "D:\Python\Python37-32\lib\site-packages\eli5\sklearn\__init__.py", line 3, in <module>
from .explain_weights import (
File "D:\Python\Python37-32\lib\site-packages\eli5\sklearn\explain_weights.py", line 197, in <module>
feature_filter=None,
File "D:\Python\Python37-32\lib\site-packages\eli5\sklearn\explain_weights.py", line 161, in deco
explain_weights_sklearn.register(cls)(f))
File "D:\Python\Python37-32\lib\site-packages\singledispatch.py", line 202, in <lambda>
return lambda f: register(cls, f)
File "D:\Python\Python37-32\lib\site-packages\singledispatch.py", line 205, in register
ns.cache_token = get_cache_token()
File "D:\Python\Python37-32\lib\site-packages\singledispatch_helpers.py", line 159, in get_cache_token
return ABCMeta._abc_invalidation_counter
AttributeError: type object 'ABCMeta' has no attribute '_abc_invalidation_counter'
答案 0 :(得分:0)
在macOS上,它是通过以下命令解决的。
$ conda install -c conda-forge eli5