我在Windows 64位上使用Python.org版本2.7 64位。我有以下Scrapy代码:
from scrapy.spider import Spider
from scrapy.selector import Selector
from scrapy.utils.markup import remove_tags
from scrapy.cmdline import execute
import re
class MySpider(Spider):
name = "wiki"
allowed_domains = ["wiki.org"]
start_urls = ["http://en.wikipedia.org/wiki/Miroslav_Klose"]
def parse(self, response):
titles = response.selector.xpath("normalize-space(//title)")
for titles in titles:
body = response.xpath("//p").extract()
body2 = "".join(body)
print remove_tags(body2).encode('utf-8')
execute(['scrapy','crawl','wiki'])
这会在使用F5运行时按预期在Python IDLE中生成一个已删除的输出,但它也会生成以下Traceback:
Traceback (most recent call last):
File "C:\Python27\mrscrap\mrscrap\spiders\test.py", line 32, in <module>
execute(['scrapy','crawl','wiki'])
File "C:\Python27\lib\site-packages\scrapy\cmdline.py", line 143, in execute
_run_print_help(parser, _run_command, cmd, args, opts)
File "C:\Python27\lib\site-packages\scrapy\cmdline.py", line 89, in _run_print_help
func(*a, **kw)
File "C:\Python27\lib\site-packages\scrapy\cmdline.py", line 150, in _run_command
cmd.run(args, opts)
File "C:\Python27\lib\site-packages\scrapy\commands\crawl.py", line 57, in run
crawler = self.crawler_process.create_crawler()
File "C:\Python27\lib\site-packages\scrapy\crawler.py", line 87, in create_crawler
self.crawlers[name] = Crawler(self.settings)
File "C:\Python27\lib\site-packages\scrapy\crawler.py", line 25, in __init__
self.spiders = spman_cls.from_crawler(self)
File "C:\Python27\lib\site-packages\scrapy\spidermanager.py", line 35, in from_crawler
sm = cls.from_settings(crawler.settings)
File "C:\Python27\lib\site-packages\scrapy\spidermanager.py", line 31, in from_settings
return cls(settings.getlist('SPIDER_MODULES'))
File "C:\Python27\lib\site-packages\scrapy\spidermanager.py", line 22, in __init__
for module in walk_modules(name):
File "C:\Python27\lib\site-packages\scrapy\utils\misc.py", line 68, in walk_modules
submod = import_module(fullpath)
File "C:\Python27\lib\importlib\__init__.py", line 37, in import_module
__import__(name)
File "C:\Python27\mrscrap\mrscrap\spiders\test.py", line 32, in <module>
execute(['scrapy','crawl','wiki'])
File "C:\Python27\lib\site-packages\scrapy\cmdline.py", line 144, in execute
sys.exit(cmd.exitcode)
SystemExit: 0
我仍然是Python的新手,但我知道Traceback是由错误引起的,但在这种情况下,我不确定这是什么错误告诉我的。谁能告诉我错误是什么和/或如何修复它?
由于