我想更新现有的spacy模型'en_core_web_sm'并使用其他数据进行训练。
我的数据格式与spacy文档中提到的格式相同 https://spacy.io/usage/training
我已经按照文档中提到的相同步骤来用我的数据更新NER模型。
def model_train(output_dir=None, n_iter=100):
"""Load the model, set up the pipeline and train the entity recognizer."""
model=('en_core_web_sm')
nlp = spacy.load(model, entity = False, parser = False) # load existing spaCy model
print("Loaded model '%s'" % model)
print (nlp.pipe_names)
# # create the built-in pipeline components and add them to the pipeline
ner = nlp.get_pipe("ner")
# # add labels
for texts, annotations in TRAIN_DATA:
for ent in annotations.get("entities"):
# print (ent)
ner.add_label(ent[2])
# print (ent[2])
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
with nlp.disable_pipes(*other_pipes): # only train NER
if model is None:
nlp.begin_training()
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
# # batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(
[texts], # batch of texts
[annotations], # batch of annotations
drop=0.5, # dropout - make it harder to memorise data
losses=losses,
)
print("Losses", losses)
nlp.to_disk(output_dir)
print("Saved model to", output_dir)
我得到的错误是
Loaded model 'en_core_web_sm'
['tagger', 'parser', 'ner']
-------------------------------------------------------------------------
- TypeError跟踪(最近一次通话) 在 ----> 1个model_train()
<ipython-input-337-91366511ed4d> in model_train(output_dir, n_iter)
56 [annotations], # batch of annotations
57 drop=0.5, # dropout - make it harder to
memorise data
---> 58 losses=losses,
59 )
60 print("Losses", losses)
C:\ProgramData\Anaconda3\lib\site-packages\spacy\language.py in
update(self, docs, golds, drop, sgd, losses, component_cfg)
432 doc = self.make_doc(doc)
433 if not isinstance(gold, GoldParse):
--> 434 gold = GoldParse(doc, **gold)
435 doc_objs.append(doc)
436 gold_objs.append(gold)
TypeError: type object argument after ** must be a mapping, not tuple
答案 0 :(得分:2)
zip()已发送文本列表和注释
nlp.update(
texts, # batch of texts
annotations, # batch of annotations
drop=0.5, # dropout - make it harder to memorise data
losses=losses,
)
点击here