我有这个JavaScript代码可以从页面中提取内容:
const res = [].slice.call(document.querySelectorAll('.gs-webResult gs-result')).reduce(function(res, parent, index) {
const name = parent.querySelector('.gs-title').textContent;
const age = parent.querySelector('.gs-snippet').textContent;
const url = parent.querySelector('.title').getAttribute('href');
const img = parent.querySelector('.gs-image').getAttribute('src');
res[name] = { name, age, url, img };
return res;
}, {});
console.log(res)
HTML:
<div class="gs-webResult gs-result">
<div class="gsc-thumbnail-inside">
<div class="gs-title"><a class="gs-title" href="https://www.google.com/url?q=https://en-gb.facebook.com/AMIDEASTYemen/posts&sa=U&ved=0ahUKEwj_uKH9l5HcAhVFXn0KHS0dDAYQFggEMAA&client=internal-uds-cse&cx=017093085003369662669:vlgrcqgvtxk&usg=AOvVaw2AJoa0Ih-fNpOLvlOxmdNS" target="_blank" dir="ltr" data-cturl="https://www.google.com/url?q=https://en-gb.facebook.com/AMIDEASTYemen/posts&sa=U&ved=0ahUKEwj_uKH9l5HcAhVFXn0KHS0dDAYQFggEMAA&client=internal-uds-cse&cx=017093085003369662669:vlgrcqgvtxk&usg=AOvVaw2AJoa0Ih-fNpOLvlOxmdNS" data-ctorig="https://en-gb.facebook.com/AMIDEASTYemen/posts">AMIDEAST <b>Yemen</b> - Posts | Facebook</a></div>
</div>
<div class="gsc-url-top">
<div class="gs-bidi-start-align gs-visibleUrl gs-visibleUrl-short" dir="ltr">en-gb.facebook.com</div>
<div class="gs-bidi-start-align gs-visibleUrl gs-visibleUrl-long" dir="ltr" style="word-break:break-all;">https://en-gb.facebook.com/AMIDEAST<b>Yemen</b>/<b>posts</b></div>
</div>
<table class="gsc-table-result">
<tbody>
<tr>
<td class="gsc-table-cell-thumbnail gsc-thumbnail" style="">
<div class="gs-image-box gs-web-image-box gs-web-image-box-portrait">
<a class="gs-image" href="https://en-gb.facebook.com/AMIDEASTYemen/posts" target="_blank" data-cturl="https://www.google.com/url?q=https://en-gb.facebook.com/AMIDEASTYemen/posts&sa=U&ved=0ahUKEwj_uKH9l5HcAhVFXn0KHS0dDAYQFggEMAA&client=internal-uds-cse&cx=017093085003369662669:vlgrcqgvtxk&usg=AOvVaw2AJoa0Ih-fNpOLvlOxmdNS" data-ctorig="https://en-gb.facebook.com/AMIDEASTYemen/posts"><img class="gs-image" onload="if (this.parentNode && this.parentNode.parentNode && this.parentNode.parentNode.parentNode) { this.parentNode.parentNode.parentNode.style.display = ''; this.parentNode.parentNode.className = 'gs-image-box gs-web-image-box gs-web-image-box-portrait'; } " src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTrsTbHMW2YtzbtH4kGtfVETKB2cVeEXB74D4yVwDKCP2mI53yzQ4Vc_FFB"></a>
</div>
</td>
<td class="gsc-table-cell-snippet-close">
<div class="gs-title gsc-table-cell-thumbnail gsc-thumbnail-left"><a class="gs-title" href="https://en-gb.facebook.com/AMIDEASTYemen/posts" target="_blank" dir="ltr" data-cturl="https://www.google.com/url?q=https://en-gb.facebook.com/AMIDEASTYemen/posts&sa=U&ved=0ahUKEwj_uKH9l5HcAhVFXn0KHS0dDAYQFggEMAA&client=internal-uds-cse&cx=017093085003369662669:vlgrcqgvtxk&usg=AOvVaw2AJoa0Ih-fNpOLvlOxmdNS" data-ctorig="https://en-gb.facebook.com/AMIDEASTYemen/posts">AMIDEAST <b>Yemen</b> - Posts | Facebook</a></div>
<div><span></span></div>
<div class="gs-bidi-start-align gs-snippet" dir="ltr">“AMIDEAST is offering <b>Yemeni</b> youth a unique opportunity by working in a professional environment and equips them with the necessary skills and tools to begin ...
</div>
<div class="gsc-url-bottom">
<div class="gs-bidi-start-align gs-visibleUrl gs-visibleUrl-short" dir="ltr">en-gb.facebook.com</div>
<div class="gs-bidi-start-align gs-visibleUrl gs-visibleUrl-long" dir="ltr" style="word-break:break-all;">https://en-gb.facebook.com/AMIDEAST<b>Yemen</b>/<b>posts</b></div>
</div>
<div class="gs-richsnippet-box" style="display: block;"><span class="gsc-richsnippet-showsnippet-label" tabindex="0">Structured data</span></div>
<div class="gs-per-result-labels" url="https://en-gb.facebook.com/AMIDEASTYemen/posts"><span>Labeled</span><a class="gs-label" data-refinementlabel="facebook" label-with-op="more:facebook" dir="ltr" tabindex="0">Facebook</a><span></span></div>
</td>
</tr>
</tbody>
</table>
</div>
获取.gs-title和gs-snippet时,代码运行良好
但是,当我尝试使用.getAttribute
获取图像源时,就会得到null
的输出值
有什么主意吗?
答案 0 :(得分:0)
存在三个主要问题:
div
,a
,img
window.onload = function(){
//REM: Fix the selector '.gs-webResult gs-result' to '.gs-webResult.gs-result'
const res = [].slice.call(document.querySelectorAll('.gs-webResult.gs-result')).reduce(function(res, parent, index) {
const name = parent.querySelector('.gs-title') ? parent.querySelector('.gs-title').textContent : '';
const age = parent.querySelector('.gs-snippet') ? parent.querySelector('.gs-snippet').textContent : '';
//REM: There is not classname 'title' in use
const url = parent.querySelector('.title') ? parent.querySelector('.title').getAttribute('href') : '';
//REM: Add 'img.' to the selector
const img = parent.querySelector('img.gs-image') ? parent.querySelector('img.gs-image').getAttribute('src') : '';
//REM: Be aware that this implied key assignment does not work on older browsers
res[name] = { name, age, url, img };
return res
}, {});
console.log(res)
}
<div class="gs-webResult gs-result">
<div class="gsc-thumbnail-inside">
<div class="gs-title"><a class="gs-title" href="https://www.google.com/url?q=https://en-gb.facebook.com/AMIDEASTYemen/posts&sa=U&ved=0ahUKEwj_uKH9l5HcAhVFXn0KHS0dDAYQFggEMAA&client=internal-uds-cse&cx=017093085003369662669:vlgrcqgvtxk&usg=AOvVaw2AJoa0Ih-fNpOLvlOxmdNS" target="_blank" dir="ltr" data-cturl="https://www.google.com/url?q=https://en-gb.facebook.com/AMIDEASTYemen/posts&sa=U&ved=0ahUKEwj_uKH9l5HcAhVFXn0KHS0dDAYQFggEMAA&client=internal-uds-cse&cx=017093085003369662669:vlgrcqgvtxk&usg=AOvVaw2AJoa0Ih-fNpOLvlOxmdNS" data-ctorig="https://en-gb.facebook.com/AMIDEASTYemen/posts">AMIDEAST <b>Yemen</b> - Posts | Facebook</a></div>
</div>
<div class="gsc-url-top">
<div class="gs-bidi-start-align gs-visibleUrl gs-visibleUrl-short" dir="ltr">en-gb.facebook.com</div>
<div class="gs-bidi-start-align gs-visibleUrl gs-visibleUrl-long" dir="ltr" style="word-break:break-all;">https://en-gb.facebook.com/AMIDEAST<b>Yemen</b>/<b>posts</b></div>
</div>
<table class="gsc-table-result">
<tbody>
<tr>
<td class="gsc-table-cell-thumbnail gsc-thumbnail" style="">
<div class="gs-image-box gs-web-image-box gs-web-image-box-portrait">
<a class="gs-image" href="https://en-gb.facebook.com/AMIDEASTYemen/posts" target="_blank" data-cturl="https://www.google.com/url?q=https://en-gb.facebook.com/AMIDEASTYemen/posts&sa=U&ved=0ahUKEwj_uKH9l5HcAhVFXn0KHS0dDAYQFggEMAA&client=internal-uds-cse&cx=017093085003369662669:vlgrcqgvtxk&usg=AOvVaw2AJoa0Ih-fNpOLvlOxmdNS" data-ctorig="https://en-gb.facebook.com/AMIDEASTYemen/posts"><img class="gs-image" onload="if (this.parentNode && this.parentNode.parentNode && this.parentNode.parentNode.parentNode) { this.parentNode.parentNode.parentNode.style.display = ''; this.parentNode.parentNode.className = 'gs-image-box gs-web-image-box gs-web-image-box-portrait'; } " src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTrsTbHMW2YtzbtH4kGtfVETKB2cVeEXB74D4yVwDKCP2mI53yzQ4Vc_FFB"></a>
</div>
</td>
<td class="gsc-table-cell-snippet-close">
<div class="gs-title gsc-table-cell-thumbnail gsc-thumbnail-left"><a class="gs-title" href="https://en-gb.facebook.com/AMIDEASTYemen/posts" target="_blank" dir="ltr" data-cturl="https://www.google.com/url?q=https://en-gb.facebook.com/AMIDEASTYemen/posts&sa=U&ved=0ahUKEwj_uKH9l5HcAhVFXn0KHS0dDAYQFggEMAA&client=internal-uds-cse&cx=017093085003369662669:vlgrcqgvtxk&usg=AOvVaw2AJoa0Ih-fNpOLvlOxmdNS" data-ctorig="https://en-gb.facebook.com/AMIDEASTYemen/posts">AMIDEAST <b>Yemen</b> - Posts | Facebook</a></div>
<div><span></span></div>
<div class="gs-bidi-start-align gs-snippet" dir="ltr">“AMIDEAST is offering <b>Yemeni</b> youth a unique opportunity by working in a professional environment and equips them with the necessary skills and tools to begin ...
</div>
<div class="gsc-url-bottom">
<div class="gs-bidi-start-align gs-visibleUrl gs-visibleUrl-short" dir="ltr">en-gb.facebook.com</div>
<div class="gs-bidi-start-align gs-visibleUrl gs-visibleUrl-long" dir="ltr" style="word-break:break-all;">https://en-gb.facebook.com/AMIDEAST<b>Yemen</b>/<b>posts</b></div>
</div>
<div class="gs-richsnippet-box" style="display: block;"><span class="gsc-richsnippet-showsnippet-label" tabindex="0">Structured data</span></div>
<div class="gs-per-result-labels" url="https://en-gb.facebook.com/AMIDEASTYemen/posts"><span>Labeled</span><a class="gs-label" data-refinementlabel="facebook" label-with-op="more:facebook" dir="ltr" tabindex="0">Facebook</a><span></span></div>
</td>
</tr>
</tbody>
</table>
</div>
答案 1 :(得分:-3)
您已提供了完整的代码。
下面是JS代码。
const defaultURL = "https://cdn.dribbble.com/users/904380/screenshots/2233565/attachments/415915/revised-google-logo.png";
const res = [].slice.call(document.querySelectorAll('.gs-webResult')).reduce(function(res, parent, index) {
const name = parent.querySelector('.gs-title').textContent;
const age = parent.querySelector('.gs-snippet').textContent;
const url = parent.querySelector('a.gs-title').getAttribute('href');
const img = parent.querySelector('a.gs-image .gs-image')? parent.querySelector('a.gs-image .gs-image').getAttribute('src') : defaultURL;
// console.log(img);
res[name] = { name, age, url, img };
return res;
}, {});
console.log(res);
输出: