我使用了示例示例代码来提取实体关系。如果出现“ MacDonald去年的收入为100万美元”之类的句子,该代码将正常工作。 但是,当我尝试说“ MacDonald的收入去年为100万美元,今年为200万美元”时。它显示出与上述相同的结果。任何人都可以帮助我修改代码,以便获得所需的结果。
import spacy
TEXTS = ["MacDonald's revenue was $1 million last year and $2 million this year "]
def extract_currency_relations(doc):
# merge entities and noun chunks into one token
spans = list(doc.ents) + list(doc.noun_chunks)
for span in spans:
span.merge()
relations = []
for money in filter(lambda w: w.ent_type_ == "MONEY", doc):
if money.dep_ in ("attr", "dobj"):
#subject = [w for w in money.head.lefts if w.dep_ == "nsubj"]
subject = [w for w in money.head.lefts if w.dep_ == "nsubj"]
if subject:
subject = subject[0]
relations.append((subject, money))
elif money.dep_ == "pobj" and money.head.dep_ == "prep":
relations.append((money.head.head, money))
return relations
model="en_core_web_sm"
nlp = spacy.load(model)
print("Loaded model '%s'" % model)
print("Processing %d texts" % len(TEXTS))
for text in TEXTS:
doc = nlp(text)
relation = extract_currency_relations(doc)
for r1, r2 in relation:
print("\n\n","{:<10}\t{}\t{}".format(r1.text, r2.ent_type_, r2.text))
我希望输出为''' MacDonald的收入为MONEY $ 100万''' '''麦当劳的收入为200万美元'''
但是实际输出只是''' MacDonald的收入MONEY $ 100万'''