我在使用python3.5的Ubuntu 14.04.4 LTS x64上使用spaCy 1.6.0。要安装spaCy的英文模型,我试着运行:
这给了我错误信息:
ubun@ner-3:~/NeuroNER-master/src$ python3.5 -m spacy.en.download
Downloading parsing model
Traceback (most recent call last):
File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.5/dist-packages/spacy/en/download.py", line 25, in <module>
plac.call(main)
File "/usr/local/lib/python3.5/dist-packages/plac_core.py", line 328, in call
cmd, result = parser.consume(arglist)
File "/usr/local/lib/python3.5/dist-packages/plac_core.py", line 207, in consume
return cmd, self.func(*(args + varargs + extraopts), **kwargs)
File "/usr/local/lib/python3.5/dist-packages/spacy/en/download.py", line 18, in main
download('en', force=False, data_path=data_path)
File "/usr/local/lib/python3.5/dist-packages/spacy/download.py", line 25, in download
about.__models__.get(lang, lang), data_path)
File "/usr/local/lib/python3.5/dist-packages/sputnik/__init__.py", line 159, in package
pool = Pool(app_name, app_version, expand_path(data_path))
File "/usr/local/lib/python3.5/dist-packages/sputnik/pool.py", line 19, in __init__
super(Pool, self).__init__(app_name, app_version, path, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/sputnik/package_list.py", line 33, in __init__
self.load()
File "/usr/local/lib/python3.5/dist-packages/sputnik/package_list.py", line 51, in load
for package in self.packages():
File "/usr/local/lib/python3.5/dist-packages/sputnik/package_list.py", line 47, in packages
yield self.__class__.package_class(path=os.path.join(self.path, path))
File "/usr/local/lib/python3.5/dist-packages/sputnik/package.py", line 15, in __init__
super(Package, self).__init__(defaults=meta['package'])
KeyError: 'package'
ubun@ner-3:~/NeuroNER-master/src$
问题是什么?
pip3 freeze
的输出是:
ubun@ner-3:~/NeuroNER-master/src$ pip3 freeze
appdirs==1.4.3
cloudpickle==0.2.2
command-not-found==0.3
cycler==0.10.0
cymem==1.31.2
cytoolz==0.8.2
decorator==4.0.11
dill==0.2.6
en-core-web-sm==1.2.0
flexmock==0.10.2
language-selector==0.1
matplotlib==2.0.0
murmurhash==0.26.4
networkx==1.11
numpy==1.12.1
packaging==16.8
pathlib==1.0.1
plac==0.9.6
preshed==0.46.4
protobuf==3.2.0
pycurl==7.19.3
pygobject==3.12.0
pyparsing==2.2.0
python-apt===0.9.3.5ubuntu2
python-dateutil==2.6.0
pytz==2016.10
requests==2.13.0
scikit-learn==0.18.1
scipy==0.19.0
semver==2.7.6
six==1.10.0
spacy==1.6.0
sputnik==0.9.3
tensorflow==1.0.1
termcolor==1.1.0
thinc==6.2.0
toolz==0.8.2
tqdm==4.11.2
ufw===0.34-rc-0ubuntu2
ujson==1.35
unattended-upgrades==0.1
wrapt==1.10.10
yolk==0.4.3
我对spaCy 1.5.0也有同样的问题。 spacy-1.7.2没有这个问题。
答案 0 :(得分:2)
这是因为sputnik
软件包已被弃用,因为spacy&gt; 1.5。
最好的办法是将Spacy升级到最新版本。或者至少到1.7 =)
否则,您可以尝试:
pip3 install https://github.com/explosion/spaCy/releases/download/v1.6.0/en-1.1.0.tar.gz
但请注意,如果已经安装了新的spacy模型,这可能会破坏你的python环境。 请记住使用虚拟环境,尤其是在反向版本的库上!
此外,这取决于Spacy 1.6可以正确安装的事实=(
请参阅https://github.com/explosion/spaCy/issues/711和https://github.com/explosion/spaCy/releases/tag/v1.6.0
查看https://pypi.python.org/pypi/sputnik
中的代码来自sputnik/package.py
:
import os
import logging
from . import util
from . import default
from .package_stub import PackageStub
class NotIncludedException(Exception): pass
class Package(PackageStub): # installed package
def __init__(self, path):
meta = util.json_load(os.path.join(path, default.META_FILENAME))
super(Package, self).__init__(defaults=meta['package'])
self.logger = logging.getLogger(__name__)
self.meta = meta
self.path = path
@property
def manifest(self):
return self.meta['manifest']
def has_file(self, *path_parts):
return any(m for m in self.manifest if tuple(m['path']) == path_parts)
def file_path(self, *path_parts):
path = util.get_path(*path_parts)
if not self.has_file(*path_parts):
raise NotIncludedException('package does not include file: %s' % path)
return os.path.join(self.path, path)
def dir_path(self, *path_parts):
# TODO check whether path is part of package
path = util.get_path(*path_parts)
return os.path.join(self.path, path)
看着
from . import default
meta = util.json_load(os.path.join(path, default.META_FILENAME))
super(Package, self).__init__(defaults=meta['package'])
我们看到meta['package']
指向sputnik/default.py
,即
# cli/param defaults
find_package_string = ''
find_meta = False
find_cache = False
search_string = ''
build_package_path = '.'
repository_url = 'https://index.spacy.io'
purge_cache = False
purge_pool = False
# misc
CHUNK_SIZE = 1024 * 16
ARCHIVE_FILENAME = 'archive.gz'
META_FILENAME = 'meta.json'
COMPRESSLEVEL = 9
COOKIES_FILENAME = 'cookies.txt'
CACHE_DIRNAME = '__cache__'
指向META_FILENAME
,即meta.json
,它引用https://index.spacy.io/的json
{
"de-1.0.0": [
"/models/de-1.0.0/meta.json",
"707615c7822e5fdba0c9047d7c864f48"
],
"en-1.1.0": [
"/models/en-1.1.0/meta.json",
"7d928b8171ece380c29285d8e1bf7879"
],
"en_glove_cc_300_1m_vectors-1.0.0": [
"/models/en_glove_cc_300_1m_vectors-1.0.0/meta.json",
"390182610e60ada31bd1d78408b86ada"
]
}
如果我们按照https://index.spacy.io/models/en-1.1.0/meta.json的面包屑,我们会看到
{
"archive": [
"archive.gz",
"84cc5c9869bfdc09072bb8d217d30c53"
],
"etag": "cd1ba4eed97115f409caf42209b503f3",
"manifest": [
{
"checksum": [
"md5",
"6d0d4b6ab1c63bae1f643d74be45b58a"
],
"noffset": 81,
"path": [
"tokenizer",
"prefix.txt"
],
"size": 58
},
{
"checksum": [
"md5",
"0653ca64d24e3772ca226c0043a54d28"
],
"noffset": 203,
"path": [
"tokenizer",
"suffix.txt"
],
"size": 121
},
{
"checksum": [
"md5",
"b0e952a69870469e2c24a06a63b7b8b3"
],
"noffset": 4766,
"path": [
"tokenizer",
"specials.json"
],
"size": 57389
},
{
"checksum": [
"md5",
"f19ca88b84e10c13ce184587f23b291d"
],
"noffset": 4852,
"path": [
"tokenizer",
"infix.txt"
],
"size": 132
},
{
"checksum": [
"md5",
"43260460e916738695dca5ea58c25634"
],
"noffset": 5466,
"path": [
"tokenizer",
"morphs.json"
],
"size": 5456
},
{
"checksum": [
"md5",
"011a72e32df2c3c87817721c903cbb33"
],
"noffset": 6023,
"path": [
"vocab",
"gazetteer.json"
],
"size": 2744
},
{
"checksum": [
"md5",
"a5be0ac5dc3d9e07e5af33db25f2df1c"
],
"noffset": 31023404,
"path": [
"vocab",
"lexemes.bin"
],
"size": 83042240
},
{
"checksum": [
"md5",
"aef38bcb805c2ed4edf17ab9b208369e"
],
"noffset": 31024046,
"path": [
"vocab",
"tag_map.json"
],
"size": 2557
},
{
"checksum": [
"md5",
"39728b8675762177066dd16162baaf5c"
],
"noffset": 31024084,
"path": [
"vocab",
"oov_prob"
],
"size": 10
},
{
"checksum": [
"md5",
"a336ae975fbe608c72b5727610445c2e"
],
"noffset": 226419131,
"path": [
"vocab",
"vec.bin"
],
"size": 211519189
},
{
"checksum": [
"md5",
"24a5c128601ffc987b8aff10c8f8acff"
],
"noffset": 226419335,
"path": [
"vocab",
"lemma_rules.json"
],
"size": 633
},
{
"checksum": [
"md5",
"b0f18c32ef9d83b8214db66f516900b2"
],
"noffset": 235404066,
"path": [
"vocab",
"strings.json"
],
"size": 18811305
},
{
"checksum": [
"md5",
"5ead864c56cce491889180b161ae43a6"
],
"noffset": 235452331,
"path": [
"vocab",
"serializer.json"
],
"size": 190524
},
{
"checksum": [
"md5",
"cc7c42f987cb1c38ec80f5fb1e7f2e93"
],
"noffset": 243140134,
"path": [
"pos",
"model"
],
"size": 11799888
},
{
"checksum": [
"md5",
"00613ddd9d320b7a26cef788919cae7e"
],
"noffset": 266495675,
"path": [
"ner",
"model"
],
"size": 36553844
},
{
"checksum": [
"md5",
"5e6e9afbd65d1d13b9b6b3bb709694e0"
],
"noffset": 266495905,
"path": [
"ner",
"config.json"
],
"size": 1237
},
{
"checksum": [
"md5",
"f37b1a7e8ccaddb5a36d093ae6511052"
],
"noffset": 556251621,
"path": [
"deps",
"model"
],
"size": 444221600
},
{
"checksum": [
"md5",
"d4a5246448e378f1f211fd93bfa4d344"
],
"noffset": 556251964,
"path": [
"deps",
"config.json"
],
"size": 1450
},
{
"checksum": [
"md5",
"bb55705666a12253d15e332329e2b1f0"
],
"noffset": 556490251,
"path": [
"wordnet",
"index.adj"
],
"size": 824127
},
{
"checksum": [
"md5",
"f6e4bd2b3473a5e40a749719c2268846"
],
"noffset": 556508918,
"path": [
"wordnet",
"sentidx.vrb"
],
"size": 73166
},
{
"checksum": [
"md5",
"ef3e1c35234edb8d7394c75f4b344c70"
],
"noffset": 556514986,
"path": [
"wordnet",
"adj.exc"
],
"size": 23019
},
{
"checksum": [
"md5",
"191515ffba85d4461d37f93059de2840"
],
"noffset": 556516925,
"path": [
"wordnet",
"sents.vrb"
],
"size": 5319
},
{
"checksum": [
"md5",
"fa5c7d42ec3214777011eabd13f34bc9"
],
"noffset": 556517242,
"path": [
"wordnet",
"frames.vrb"
],
"size": 1125
},
{
"checksum": [
"md5",
"8c949e6ef352295997b09e2446364e43"
],
"noffset": 557891009,
"path": [
"wordnet",
"index.noun"
],
"size": 4786655
},
{
"checksum": [
"md5",
"fa5c7d42ec3214777011eabd13f34bc9"
],
"noffset": 557891326,
"path": [
"wordnet",
"verb.Framestext"
],
"size": 1125
},
{
"checksum": [
"md5",
"98636a3c14d26002264d352ea57d713a"
],
"noffset": 558062212,
"path": [
"wordnet",
"index.verb"
],
"size": 523980
},
{
"checksum": [
"md5",
"951700d36c2c84a20fda9550028dc7cc"
],
"noffset": 558075491,
"path": [
"wordnet",
"noun.exc"
],
"size": 38301
},
{
"checksum": [
"md5",
"d8016b74fcb68ef5139a4c51d22bdbdf"
],
"noffset": 558086414,
"path": [
"wordnet",
"verb.exc"
],
"size": 38033
},
{
"checksum": [
"md5",
"a55bf29bc2f59e33ea31568874f6a294"
],
"noffset": 558132762,
"path": [
"wordnet",
"index.adv"
],
"size": 162816
},
{
"checksum": [
"md5",
"c0d9112ae92a3ce3a149541c16c0386a"
],
"noffset": 558132844,
"path": [
"wordnet",
"adv.exc"
],
"size": 85
}
],
"package": {
"compatibility": {
"spacy": null
},
"description": "default English model",
"license": "public domain",
"name": "en",
"version": "1.1.0"
}
}