Question

enter image description here 我想使用抓取蜘蛛和规则I实现抓取到所有网页，但无法获取通过规则提取的数据不知道为什么？

import scrapy
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from urlparse import urljoin
from delhivery.items import DelhiveryItem




class criticspider(CrawlSpider):
    name = "delh"
    allowed_domains = ["consumercomplaints.in"]
    start_urls = ["http://www.consumercomplaints.in/?search=delhivery&page=1"]

    rules = (
        # Extracting pages, allowing only links with page=number to be extracted 
        Rule(SgmlLinkExtractor(restrict_xpaths=('//div[@class="pagelinks"]', ), allow=('page=\d+', )),follow=True, callback='parse_item'),
    )

    def parse_item(self, response):
        item = DelhiveryItem()
        sites = response.xpath('//table[@width="100%"]')
        items=[]
        for site in sites:
            item['title'] = response.xpath('.//td[@class="complaint"]/a/span/text()').extract()[0]
            # item['subtitle'] = response.xpath('<write xpath>').extract()[0]
            # item['date'] = response.xpath('<write xpath>').extract()[0].strip()
            # item['username'] = response.xpath('<write xpath>').extract()[0]
            #item['link'] = response.url
            # item['data'] = response.xpath('<write xpath>').extract()[0]
            items.append(item)
        return items

我的蜘蛛出了什么问题？

0 个答案: