我在此处抓取页面,但是每执行一次此代码,about_page
就会重复3次。如何结束此重复。我只希望它一次给我about_page
。
import scrapy
class DmozSpiderSpider(scrapy.Spider):
name = 'Dmoz'
start_urls = ['http://dmoz-odp.org/']
about_page = 'http://dmoz-odp.org/docs/en/about.html'
def parse(self, response):
items = {'About_page': self.about_page}
# save and call request to another page
dct = [(self.about_page, self.parse_about)]
for page, callback in dct:
yield response.follow(page, callback, meta={'items': items})
def find_items(self, response, names, finder):
items = response.meta['items']
for name, find in zip(names.values(), finder.values()):
items[name] = response.css(find).extract()
yield items
def parse_about(self, response):
names = {'name1': 'Headings',
'name2': 'Paragraphs',
'name3': '3 Projects',
}
finder = {'find1': 'h2::text , #mainContent h1::text',
'find2': 'p::text',
'find3': 'li~ li+ li b a::text , li:nth-child(1) b a::text',
}
yield from self.find_items(response, names, finder)
答案 0 :(得分:0)
修复缩进:
{{1}}