在start_request方法中,您可以修改start_urls请求cookie。 但我定义了一些规则,蜘蛛抓取页面使用这些规则。那么当蜘蛛使用规则抓取页面时如何更改请求cookie?
class MoiveSpider(CrawlSpider):
name = "movieTop250"
allowed_domains = ["movie.douban.com"]
start_urls = ["https://movie.douban.com/top250"]
is_forbiden = False
rules = [
Rule(LinkExtractor(allow=(r'https://movie.douban.com/top250\?start=\d+.*'))),
Rule(LinkExtractor(allow=(r'https://movie.douban.com/subject/\d+')),
callback="parse_item"),
]
def request(self, url):
request = scrapy.Request(url=url,
meta={'dont_merge_cookies': self.is_forbiden},)
request.cookies["cookie"] = "bid=%s" % "".join(
random.sample(string.ascii_letters + string.digits, 11))
return request
def start_requests(self):
for url in self.start_urls:
yield self.request(url)
def parse_item(self, response):
item = parse_movie_item(self, response)
return item