我想从这段代码片段解析文章的链接...... 我感兴趣的职位是:data-article-link
我需要相对URL才能继续解析以下网站。
我尝试了很多选择器组合,但不幸的是没有任何东西让我实现目标。 任何人都可以帮我设置正确的代码吗?
我试过几个小时。没有成功......非常感谢帮助。 原始链接是这样的: http://www.hornbach.de/shop/Schrauben/Holzschrauben/S1678/artikelliste.html#/eyJuIjoxLCJ2aWV3IjoiZ2FsbGVyeSIsImxpc3RDcml0ZXJpYSI6eyJwYWdlU2l6ZSI6NzIsInBhZ2VOdW1iZXIiOjEsInNvcnRPcmRlciI6InNvcnRNb2RlRHYifSwiYWN0aXZlRmlsdGVycyI6W119
<div ng-class="{lastitem:$last}" ng-style="$index % cols ? {} : {clear: 'both'}" **data-article-link**="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html" on-finish-repeat="initArticleBoxes" ng-repeat="article in articleData.articles" class="article row-1" style="clear: both;">
<!--div class="article row-{{calculateRow($index)}}" quick-ng-repeat="article in articleData.articles" quick-repeat-list="articles" on-finish-repeat="initArticleBoxes" data-article-link="{{article.localizedExternalArticleLink}}" style="{{$index % cols ? '' : 'clear: both;'}}"-->
<a class="image-container" ng-href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html" href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html">
<div ng-show="article.highlight" class="banderole ng-hide">
<span ng-show="article.highlight == 'TOP'" class="top_product ng-hide">Top-Artikel</span>
</div>
<img alt="Abdeckkappe für Eternitschraube ohne Stahleinlage, 100 Stück" title="" image-lazy-src="/data/shop/D04/001/780/491/336/06/DV_157x152_3836599_02_4c_DE_20120704115840.jpg" src="/data/shop/D04/001/780/491/336/06/DV_157x152_3836599_02_4c_DE_20120704115840.jpg" class="article-image">
</a>
<div class="article-info-wrapper">
<div class="compare" style="height: 16px;">
<a ng-show="article.placedOnComparisonList" href="/shop/artikelvergleich.html" class="to-compare-list ng-hide">Zum Artikelvergleich</a>
<label ng-hide="article.placedOnComparisonList" for="compare-3836599" ng-class="{disabled: article.disableComparisonList}" class="label">vergleichen</label>
<input type="checkbox" ng-change="toggleCompareThisArticle($index)" ng-disabled="article.disableComparisonList" ng-model="article.placedOnComparisonList" name="compare-3836599" class="checkbox ng-pristine ng-valid" id="compare-3836599">
</div>
<a ng-href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html" class="title-link" href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html">
<span class="title ng-binding" style="height: 60px;">
Abdeckkappe für Eternitschraube ohne Stahleinlage 100 Stück
</span>
</a>
<a ng-href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html#ratings" class="rating-link" href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html#ratings">
<span ng-show="article.articleRatingsTotal.ratingsCount" class="rating">
<img width="91" height="18" alt="rating" src="/images/content/global/icn_hammer@2x.png" class="icon r-5"> <span class="rating-number ng-binding">(1)</span>
</span>
<span ng-hide="article.articleRatingsTotal.ratingsCount" class="rating unrated ng-hide">
Noch nicht bewertet
</span>
</a>
</div>
<div class="shopping-info-wrapper">
<a ng-href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html" class="pricing-wrapper" href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html">
<span class="pricing">
<span class="uvp ng-hide" ng-hide="article.recommendedRetailPrice == null">UVP <s class="ng-binding"> <span ng-hide="article.recommendedRetailPrice.unit == null" class="ng-binding ng-hide"> / </span></s></span>
<span class="price ng-binding">5,05 €<span class="asterisk ng-binding" ng-hide="article.standardPrice.asterisk == null">*</span> <span class="unit ng-binding">/ Pack</span></span>
<span class="baseprice ng-binding" ng-hide="article.basePrice == null || article.packagePrice != null">(15,78 €* / Kilogramm)</span>
<span class="packageprice ng-binding ng-hide" ng-hide="article.packagePrice == null">(Packpreis <span class="asterisk ng-binding ng-hide" ng-hide="article.packagePrice.asterisk == null"></span>)</span>
</span>
</a>
<div class="clearfix energy-efficiency-block class- listview" style="height: 0px;">
<!-- ngIf: article.energyEfficiencyClass -->
<!-- ngIf: article.energyEfficiencyData -->
</div>
<div ng-show="article.displayStatusDV || article.displayStatusMarket" class="displaystatus" style="height: 111px;">
<ul>
<li><a class="status dv active" ng-href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html" href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html" style="height: 12px;">Online bestellbar</a></li>
<li><a class="status ra active" ng-href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html" href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html" style="height: 12px;">reservierbar</a></li>
<li ng-show="article.multipleVariantsText" class="variant ng-hide"><a class="status ng-binding" ng-href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html" href="/shop/Abdeckkappe-fuer-Eternitschraube-ohne-Stahleinlage-100-Stueck/3836599/artikel.html"></a></li>
</ul>
</div>
</div>
</div>
答案 0 :(得分:0)
标签data-article-link
的内容在运行时通过JavaScript / AngularJs填充。因此,您对简单的JSoup方法感到不满意。
但是,您可以使用selenium和webdriver从真正的broswer获取呈现的页面。或者您可以尝试查找对websit API的AJAX调用,并以某种方式从那里获取链接。