我有一个爬虫蜘蛛,它在两个for循环中产生请求,是否有一种方法可以根据yield函数的结果使其中断for循环? (即,在找到parse_results
变量x
时中断)
def parse(self,response):
#code here
offsets = [i for i in range(0,10001,20)]
for query in all_queries:
for offset in offsets:
query = query+f'&offset={offset}'
yield scrapy.Request(url= query,callback= self.parse_results)
#break if parse_results gives KeyError (x is True)
def parse_results(self,response):
try:
x=response.xpath('//*[@id="alert_bar"]').extract_first()
if x:
raise KeyError # condition
except KeyError:
rows = response.xpath('//*[@id="sRes"]/div[@class="sResCont"]')
for row in rows:
if row.xpath('div[@class="adFrameCnt"]').extract_first():
continue
else:
item = UserItem() # scrapy item
item['username'] = row.xpath('div/div[@class="sResMain"]/b/a/text()').extract_first()
item['link'] = response.urljoin(row.xpath('div/div[@class="sResMain"]/b/a/@href').extract_first())
self.found_items.append(item)