我无法遍历下一页,有点坚持使用SgmlLinkextractor规则。 请进一步导航我,我不知道我在哪里犯了错误
import scrapy
from product.items import ProductItem
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
class BurlingtoncoatfactoryDSpider(CrawlSpider):
name = "burlingtoncoatfactory_d"
allowed_domains = ["http://www.burlingtoncoatfactory.com"]
start_urls = ['http://www.burlingtoncoatfactory.com/catalog/searchresults.aspx?filter=&search=south+shore+furnitures&viewMode=FortyEight#pg=1']
rules = (
Rule(SgmlLinkExtractor(allow=('catalog/searchresults\.aspx\?filter=&search=south+shore+furnitures&viewMode=FortyEight#pg=\d*',),),
callback='parse', follow= True),
)
def parse(self, response):
item=ProductItem()
item['product_DetailUrl'] = [content for content in response.xpath('//div[@id="hawkitemlist"]//div[@class="row clearfix"]//div[@class="product-image"]/a/@href').extract()]
item['product_Rating'] = [content for content in response.xpath('//*//div[@class="product-inner clearfix"]/div[@class="review"]/span[1]/@class').extract()]
item['product_NumberOfReviews'] = [content for content in response.xpath('//*//div[@class="product-inner clearfix"]/div[@class="review"]/span[2]/text()').extract()]
yield item