当我通过Python脚本调用Spider时,它给了我一个ImportError
:
ImportError: No module named app.models
我的items.py
是这样的:
from scrapy.item import Item, Field
from scrapy.contrib.djangoitem import DjangoItem
from app.models import Person
class aqaqItem(DjangoItem):
django_model=Person
pass
我的settings.py
是这样的:
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/topics/settings.html
#
BOT_NAME = 'aqaq'
BOT_VERSION = '1.0'
SPIDER_MODULES = ['aqaq.spiders']
NEWSPIDER_MODULE = 'aqaq.spiders'
USER_AGENT = '%s/%s' % (BOT_NAME, BOT_VERSION)
ITEM_PIPELINES = [
'aqaq.pipelines.JsonWithEncodingPipeline']
import sys
import os
c=os.getcwd()
os.chdir("../../myweb")
d=os.getcwd()
os.chdir(c)
sys.path.insert(0, d)
# Setting up django's settings module name.
# This module is located at /home/rolando/projects/myweb/myweb/settings.py.
import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'myweb.settings'
我调用蜘蛛的Python脚本是这样的:
from twisted.internet import reactor
from scrapy.crawler import Crawler
from scrapy import log, signals
from final.aqaq.aqaq.spiders.spider import aqaqspider
from scrapy.utils.project import get_project_settings
def stop_reactor():
reactor.stop()
spider = aqaqspider(domain='aqaq.com')
settings = get_project_settings()
crawler = Crawler(settings)
crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
crawler.configure()
crawler.crawl(spider)
crawler.start()
log.start()
reactor.run()
我的目录结构如下:
.
|-- aqaq
| |-- aqaq
| | |-- call.py
| | |-- __init__.py
| | |-- __init__.pyc
| | |-- items.py
| | |-- items.pyc
| | |-- pipelines.py
| | |-- pipelines.pyc
| | |-- settings.py
| | |-- settings.pyc
| | `-- spiders
| | |-- aqaq.json
| | |-- __init__.py
| | |-- __init__.pyc
| | |-- item.json
| | |-- spider.py
| | |-- spider.pyc
| | `-- url
| |-- call.py
| |-- call_spider.py
| `-- scrapy.cfg
|-- mybot
| |-- mybot
| | |-- __init__.py
| | |-- items.py
| | |-- pipelines.py
| | |-- settings.py
| | `-- spiders
| | |-- example.py
| | `-- __init__.py
| `-- scrapy.cfg
`-- myweb
|-- app
| |-- admin.py
| |-- admin.pyc
| |-- __init__.py
| |-- __init__.pyc
| |-- models.py
| |-- models.pyc
| |-- tests.py
| `-- views.py
|-- manage.py
`-- myweb
|-- file
|-- __init__.py
|-- __init__.pyc
|-- settings.py
|-- settings.pyc
|-- urls.py
|-- urls.pyc
|-- wsgi.py
`-- wsgi.pyc
请帮助我,因为我是Scrapy的新手。
我真的很困惑 我尝试导入import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'myweb.settings
在我的脚本中出现了最新的错误
get_project_settings is invalid
我的scarapy版本也是18
谢谢大家,我得到了解决方案
答案 0 :(得分:1)
也许您的问题是您在设置之前导入蜘蛛。 ImportError
可能来自您from app.models import Person
中的items.py
。
因此,在设置设置后导入蜘蛛:
crawler.configure()
from final.aqaq.aqaq.spiders.spider import aqaqspider
spider = aqaqspider(domain='aqaq.com')
crawler.crawl(spider)
答案 1 :(得分:0)
我前段时间在媒体上写过这篇文章,也许可以帮到你!
https://medium.com/@tiago.piovesan.tp/make-a-crawler-with-django-and-scrapy-a41adfdd24d9
这是库之间的集成配置: crawler/settings.py
import os
import sys
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), ".."))
os.environ['DJANGO_SETTINGS_MODULE'] = 'myweb.settings'
import django
django.setup()