我想用 SpaCy 训练我自己的自定义 NER 来识别地址。
这是我的数据:
training_data = [('send to: Aargauerstrasse 8005', {'entities': [(9, 28, 'ADDRESS')]}),
('send to: Abeggweg 8057', {'entities': [(9, 21, 'ADDRESS')]}),
('send to: Abendweg 8038', {'entities': [(9, 21, 'ADDRESS')]}),
('send to: Ackermannstrasse 8044', {'entities': [(9, 29, 'ADDRESS')]}),
('send to: Aehrenweg 8050', {'entities': [(9, 22, 'ADDRESS')]}),
('send to: Aemmerliweg 8050', {'entities': [(9, 24, 'ADDRESS')]}),
('send to: Albisgütliweg 8045', {'entities': [(9, 26, 'ADDRESS')]}),
('send to: Albisstrasse 8038', {'entities': [(9, 25, 'ADDRESS')]}),
('send to: Albulastrasse 8048', {'entities': [(9, 26, 'ADDRESS')]}),
('send to: Alderstrasse 8008', {'entities': [(9, 25, 'ADDRESS')]})]
我已经按照本教程(官方教程... 20 分 30 秒): https://www.youtube.com/watch?v=IqOJU1-_Fi0&t=1328s
这些是我的功能:
# CREATING BLANK MODEL
def create_blank_nlp(train_data):
nlp = spacy.blank("en") # prazan model
nlp.add_pipe("transformer")
nlp.add_pipe("parser")
ner = nlp.create_pipe("ner") # ubaci custom ner
nlp.add_pipe("ner", last = True)
ner = nlp.get_pipe("ner")
for _, data in train_data:
for ent in data.get("entities"):
ner.add_label(ent[2])
return nlp
nlp = create_blank_nlp(train_data)
optimizer = nlp.begin_training()
# TRAINING
for i in range(5):
random.shuffle(train_data)
losses = {}
sizes = compounding(1.0, 5.0, 150.0)
batches = minibatch(train_data, size = sizes)
for batch in batches:
for text, annotations in batch:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
nlp.update([example], drop = 0.2, sgd = optimizer, losses = losses)
print("Lossess at iteration", i, losses)
我该怎么办?