如何运行下载和解析相同的URL,而未收到所需的数据,或者递归的调用是> 5次?
例如,我需要从example.com获取h1,但服务器返回不同的页面:使用h1,没有它。我希望代码运行下载和解析网址,而h1没有得到或递归调用> 5次。
例如
import re
from kupito.items import KupitoItem
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from scrapy.selector import Selector
class idpkzSpider(CrawlSpider):
name = 'idp.kz'
allowed_domains = ['idp.kz']
start_urls = [
'http://idp.kz/index.php/katalog-tovarov/monitors'
]
rules = (
Rule(LinkExtractor(allow=('.*start=\d+$')), callback='parse_start_url', follow=True),
)
def parse_start_url(self, response):
items = []
hxs = Selector(response)
dirEl = hxs.xpath('//h1/text()').extract()
goods = hxs.xpath('//div[@class=\'jshop list_product\']/div[@class=\'block_product\']')
if dirEl and len(goods) > 0:
dirName = dirEl[0].encode('utf-8').strip()
for good in goods:
name = good.xpath('div//div[@class=\'name\']/a/text()').extract()
price = good.xpath('div//div[@class=\'jshop_price\']/span/text()').extract()
url = good.xpath('div//div[@class=\'name\']/a/@href').extract()
if name and price and url:
item = KupitoItem()
item['name'] = name[0].encode('utf-8').strip()
item['price'] = price[0].encode('utf-8').strip()
item['url'] = url[0].encode('utf-8').strip()
item['dirName'] = dirName
items.append(item)
return items