from __future__ import unicode_literals
from __future__ import print_function
import plac
from pathlib import Path
from spacy.vocab import Vocab
from spacy.tagger import Tagger
from spacy.tokens import Doc
from spacy.gold import GoldParse
import random
TAG_MAP = {
'N': {"pos": "NOUN"},
'V': {"pos": "VERB"},
'J': {"pos": "ADJ"}
}
# Usually you'll read this in, of course. Data formats vary.
# Ensure your strings are unicode.
DATA = [
(
["ids", "id's", "invoiceid", "invoice_id", "Ray"],
["N", "N", "N", "N", "N"]
)
]
def ensure_dir(path):
if not path.exists():
path.mkdir()
def main(output_dir=None):
output_dir = '/home/Ray/Tagger'
if output_dir is not None:
output_dir = Path(output_dir)
ensure_dir(output_dir)
ensure_dir(output_dir / "pos")
ensure_dir(output_dir / "vocab")
vocab = Vocab(tag_map=TAG_MAP)
tagger = Tagger(vocab)
for i in range(25):
for words, tags in DATA:
doc = Doc(vocab, words=words)
gold = GoldParse(doc, tags=tags)
tagger.update(doc, gold)
random.shuffle(DATA)
tagger.model.end_training()
doc = Doc(vocab, orths_and_spaces=zip(
["ID", "Id", "iD", "IDs", "id", "ids", "id's", "ID's", "invoice id", "inv id", "ray"], [True] * 11))
tagger(doc)
for word in doc:
print(word.text, word.tag_, word.pos_)
if output_dir is not None:
tagger.model.dump(str(output_dir / 'pos' / 'model'))
with (output_dir / 'vocab' / 'strings.json').open('w') as file_:
tagger.vocab.strings.dump(file_)
if __name__ == '__main__':
plac.call(main)
输出返回以下错误: -
ID N NOUN
Id N NOUN
iD N NOUN
IDs N NOUN
id N NOUN
ids N NOUN
id's N NOUN
ID's N NOUN
invoice id N NOUN
inv id N NOUN
ray N NOUN
Traceback (most recent call last):
File "/home/Ray/spaCy-2.0.0-alpha/examples/training/train_tagger.py", line 75, in <module>
plac.call(main)
File "/usr/local/lib/python2.7/dist-packages/plac_core.py", line 328, in call
cmd, result = parser.consume(arglist)
File "/usr/local/lib/python2.7/dist-packages/plac_core.py", line 207, in consume
return cmd, self.func(*(args + varargs + extraopts), **kwargs)
File "/home/Ray/spaCy-2.0.0-alpha/examples/training/train_tagger.py", line 71, in main
tagger.vocab.strings.dump(file_)
AttributeError: 'spacy.strings.StringStore' object has no attribute 'dump'
Process finished with exit code 1
当我没有给出路径(即:Output_dir ='/ path')时,它工作正常。 如果我不提供任何路径,我将如何找到生成'pos'和'vocab'文件夹的位置。请帮忙
答案 0 :(得分:1)
将StringStore.dump替换为StringStore.to_disk和StringStore.to_bytes。 这已在新文档中更新。
答案 1 :(得分:0)
在spacy v2中,所有序列化方法都已更新为使用一致的API
所以StringStore.dump已被StringStore.to_disk和StringStore.to_bytes取代。
我认为这应该有用。