LinkOxtractor如何使用SplashRequest发送请求
我运行此代码但仅向主页发送请求,不提取主页的链接
代码:
rules = { Rule(
LinkExtractor(
restrict_xpaths=('//ul[contains(@class, "nav-list")]'),
),
callback="parse_item",
process_request='use_splash'
)
}
def use_splash(self, request):
return SplashRequest(url=request.url,
endpoint="render.json",
args={
"wait":1,
"har": 1,
"html": 1,
})
def start_requests(self):
yield SplashRequest("http://www.example.com",
callback=self.parse_item,
endpoint="render.json",
args={
"wait":1,
"har": 1,
"html": 1,
})
def parse_item(self):
# Extract item
修改
有评论吗?