我的源代码如下:
//Spider
class test_crawler(BaseSpider):
name = 'test'
allowed_domains = ['http://test.com']
start_urls = ['http://test.com/test']
def parse(self, response):
hxs = HtmlXPathSelector(response)
question_info = hxs.select('//div[contains(@class, "detail")]')
answer_info = hxs.select('//div[contains(@class, "doctor_ans")]')
row_for_question = question_info.select('table/tr/td')
qna = QnaItem()
qna['title'] = question_info.select('h2/text()').extract()
qna['category'] = row_for_question[3].select('a/text()').extract()
qna['question'] = row_for_question[7].select('text()').extract()
qna['answer'] = answer_info.select('p[contains(@class,"MsoNormal")]/span/span/span/font/text()').extract()
return qna
//Pipeline
class XmlExportPipeline(object):
def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
dispatcher.connect(self.spider_closed, signals.spider_closed)
self.files = {}
def spider_opened(self, spider):
file = open('%s_products.xml' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = XmlItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
当我在shell命令(scrapy shell http://test.com/test)中运行时,它运行正常。我没有收到任何错误。 但是,当我运行命令“scrapy crawl test”时,我遇到以下错误:
Traceback (most recent call last):
File "C:\Python27\lib\site-packages\twisted\internet\base.py", line 11
78, in mainLoop
self.runUntilCurrent()
File "C:\Python27\lib\site-packages\twisted\internet\base.py", line 80
0, in runUntilCurrent
call.func(*call.args, **call.kw)
File "C:\Python27\lib\site-packages\twisted\internet\defer.py", line 3
68, in callback
self._startRunCallbacks(result)
File "C:\Python27\lib\site-packages\twisted\internet\defer.py", line 4
64, in _startRunCallbacks
self._runCallbacks()
--- <exception caught here> ---
File "C:\Python27\lib\site-packages\twisted\internet\defer.py", line 5
51, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "E:\Projects\tysk-osqa\osqa\scrapy\qna_crawler\spiders\qna.py", l
ine 14, in parse
question_info = HtmlXPathSelector(response).select('//div[contains(@
class, "detail")]')
File "C:\Python27\lib\site-packages\scrapy-0.14.4-py2.7.egg\scrapy\sel
ector\dummysel.py", line 16, in _raise
raise RuntimeError("No selectors backend available. " \
exceptions.RuntimeError: No selectors backend available. Please install
libxml2 or lxml
这不是真的,因为我已经安装了libxml2和lxml。我从http://www.lfd.uci.edu/~gohlke/pythonlibs/下载并安装二进制包(64位) 另外,我可以成功从cmd导入lxml和libxml2。
请帮我解决这个问题。
非常感谢你。
答案 0 :(得分:0)
您需要安装32位版本的libxml
和libxml2
并注意安装Windows的二进制文件时;它们仅为系统Python(在注册表中找到的)安装。
答案 1 :(得分:-1)
我认为你没有设置virtualenv来安装libxml2,lxml。
尝试:pip install lxml
并将lxml添加到requirements.txt
中