在接下来的页面上还是仅在首页上可以进行JavaScript渲染?该脚本不适用于我,我怀疑蜘蛛程序不会在接下来的页面上等待3秒,并且Splash之后返回的不是HTML。
class JsSpider(CrawlSpider):
name = 'js_spider'
start_urls = ["http://www.jspage.net"] # First level
def start_requests(self):
for url in self.start_urls:
yield SplashRequest(url=url,
callback=self.parse,
endpoint='render.html',
args={'wait':3})
# 1. Following
rules = (
Rule(LinkExtractor(restrict_xpaths='//div/a/@href'), callback='parse', follow=True), # Level 1
Rule(LinkExtractor(restrict_xpaths='//div/a/@href'), callback="parse"), # Level 2
)
def parse(self, response):
pass