我使用scrapy-splash尝试从分页块访问下一页的href链接。我似乎无法找到解决方案,因此,我们非常感谢您的帮助。从首页提取数据按预期进行。
要获得我尝试过的下一页链接:
next_page = response.xpath("//ul[@class='pagination center ng-scope']/li[8]/a/@href").get()
if next_page:
yield scrapy.Request(url=next_page, callback=self.parse)
如以下代码所示,href值为href=""
<nav class="paging ng-isolate-scope" data-jn-result-pager="">
<!-- ngIf: totalPages > 1 --><ul class="pagination center ng-scope" data-ng-if="totalPages > 1" style="">
<li>
<a href="" data-jn-click="firstPage()" data-ng-class="{'disabled-element':currentPage === 1}" tabindex="0" class="" style="">
<span class="hidden-md hidden-sm hidden-xs">Første</span>
<span class="hidden-lg icon icon-arrow-end-left"></span>
</a>
</li>
<li>
<a href="" data-jn-click="prevPage()" data-ng-class="{'disabled-element':currentPage === 1}" tabindex="0" class="" style="">
<span class="hidden-md hidden-sm hidden-xs"><span class="icon icon-arrow-left"></span> Forrige</span>
<span class="hidden-lg icon icon-double-arrow-left"></span>
</a>
</li>
<!-- ngRepeat: index in pagerList --><li data-ng-repeat="index in pagerList" data-ng-class="{'active': index === currentPage}" class="ng-scope" style="">
<a href="" data-jn-click="goToPage(index)" class="ng-binding" tabindex="0">767</a>
</li><!-- end ngRepeat: index in pagerList --><li data-ng-repeat="index in pagerList" data-ng-class="{'active': index === currentPage}" class="ng-scope">
<a href="" data-jn-click="goToPage(index)" class="ng-binding" tabindex="0">768</a>
</li><!-- end ngRepeat: index in pagerList --><li data-ng-repeat="index in pagerList" data-ng-class="{'active': index === currentPage}" class="ng-scope">
<a href="" data-jn-click="goToPage(index)" class="ng-binding" tabindex="0">769</a>
</li><!-- end ngRepeat: index in pagerList --><li data-ng-repeat="index in pagerList" data-ng-class="{'active': index === currentPage}" class="ng-scope active" style="">
<a href="" data-jn-click="goToPage(index)" class="ng-binding" tabindex="0">770</a>
</li><!-- end ngRepeat: index in pagerList --><li data-ng-repeat="index in pagerList" data-ng-class="{'active': index === currentPage}" class="ng-scope" style="">
<a href="" data-jn-click="goToPage(index)" class="ng-binding" tabindex="0">771</a>
</li><!-- end ngRepeat: index in pagerList -->
<li>
<a href="" data-jn-click="nextPage()" data-ng-class="{'disabled-element':currentPage === totalPages}" tabindex="0" class="" style="">
<span class="hidden-md hidden-sm hidden-xs">Næste <span class="icon icon-arrow-right"></span></span>
<span class="hidden-lg icon icon-double-arrow-right"></span>
</a>
</li>
<li>
<a href="" data-jn-click="lastPage()" data-ng-class="{'disabled-element':currentPage === totalPages}" tabindex="0" class="" style="">
<span class="hidden-md hidden-sm hidden-xs">Sidste</span>
<span class="hidden-lg icon icon-arrow-end-right"></span>
</a>
</li>
</ul><!-- end ngIf: totalPages > 1 -->
强文本