Python闪屏:使用JS next按钮抓取网站

时间:2020-05-22 11:09:04

标签: javascript python scrapy splash-screen

我的目标是单击页面上的“下一步”。 主要问题-下一步按钮没有网址,只有javascript。

有人可以向我展示解决我的问题的正确方法吗?

import scrapy
from scrapy_splash import SplashRequest

class QuotesSpider(scrapy.Spider):
    name = "cruises"
    start_urls = ["https://www.msccruises.com/en-gl/Plan-Book/Find-Cruise.aspx"]

def start_requests(self):
    for url in self.start_urls:
        yield SplashRequest(url, self.parse, args={'wait': 0.5})

def parse(self, response):

    lua_src = """
    function main(splash) 
        splash:go(splash.args.url)
        splash:wait(1)
        --splash:runjs("document.querySelectorAll('#mainContent_fycResults_rptPagination_btnPageNext')[0].click()")            
        splash:runjs("document.getElementsByClassName('btnPageNext')[0].click()")            
        splash:wait(1)
        page = splash:html()
        return page
    end"""

    next_page = response.xpath('//a[@class="btnPageNext"]')
    if next_page is not None:
        url = "https://www.msccruises.com/en-gl/Plan-Book/Find-Cruise.aspx"
        yield SplashRequest(
            url,
            self.parse,
            endpoint='execute',
            method='POST',
            dont_filter=True,
            args={
                'wait': 1.0,
                'lua_source': lua_src,
            },
        )

0 个答案:

没有答案