我正在使用python scrapy
来抓取reddit
。但是我发现scrapy的响应html没有按钮next page
,所以我无法移到我的Spider代码的下一页。这是由reddit
引起的还是我使用scarpy
的方式有什么问题?下面是我目前在python中拥有的代码:
class RedditNetflixSpider(scrapy.Spider):
name = 'reddit_netflix'
allowed_domains = ['www.reddit.com/r/movies/']
start_urls = ['https://www.reddit.com/r/movies/']
def parse(self, response):
titles = response.css('h3::text').extract()
votes = response.css('.voteButton[aria-label="upvote"] + div::text').extract()
odd_votes = votes[::2]
comments = response.css('a[rel="nofollow"] span::text').extract()
for item in zip(titles, odd_votes, comments):
scraped_info = {
'title': item[0],
'vote': item[1],
'comments': item[2].replace(' comments', '').strip(),
}
yield scraped_inf