我想在网页上获取Google广告内容。 我正在使用phantomJS,当我渲染图像时,我可以看到谷歌广告块。 但是当我获得源代码(html)时,它只是javascript代码。
我的简单代码。
var fs = require('fs');
var page = require('webpage').create();
var url = "http://www.thegeekstuff.com/2016/04/oracle-undo-tablespace/";
page.open(url);
page.onLoadFinished = function()
{
fs.write("source.htm", page.content, 'w');
page.render('render.png');
phantom.exit();
};
渲染图片包含Google广告内容。
但HTML来源:
<p>In the above example, we have two UNDO tablespace listed. But only one of them can be active and used by the system. The other one is currently not used.</p>
<p>So, the best way to view the current valid UNDO tablespace is by using “show parameter” as shown below.</p>
<center>
<div style="margin-left:2px; margin-top:10px; margin-bottom:10px; ">
<!-- AD BLOCK -->
<script async="" src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
<!-- TGS Inside Content -->
<ins class="adsbygoogle" style="display: inline-block; width: 300px; height: 250px;" data-ad-client="ca-pub-8090601437064582" data-ad-slot="8643685131" data-adsbygoogle-status="done"><ins id="aswift_1_expand" style="display:inline-table;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px;background-color:transparent"><ins id="aswift_1_anchor" style="display:block;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px;background-color:transparent"><iframe width="300" height="250" frameborder="0" marginwidth="0" marginheight="0" vspace="0" hspace="0" allowtransparency="true" scrolling="no" allowfullscreen="true" onload="var i=this.id,s=window.google_iframe_oncopy,H=s&&s.handlers,h=H&&H[i],w=this.contentWindow,d;try{d=w.document}catch(e){}if(h&&d&&(!d.body||!d.body.firstChild)){if(h.call){setTimeout(h,0)}else if(h.match){try{h=s.upd(h,i)}catch(e){}w.location.replace(h)}}" id="aswift_1" name="aswift_1" style="left:0;position:absolute;top:0;"></iframe></ins></ins></ins>
<script>
(adsbygoogle = window.adsbygoogle || []).push({});
</script>
<!-- END AD BLOCK -->
答案 0 :(得分:1)
查看页面输出,看起来是一个名为&#39; aswift_1&#39;的iframe。这又包含一个名为&#39; google_ads_frame2&#39;的iframe。我对谷歌广告的结构并不熟悉,但一目了然,这些广告内容可用于输出广告内容。
如果您想使用phantomjs获取此iframe内容,可以使用以下代码:
var fs = require('fs');
var page = require('webpage').create();
var url = "http://www.thegeekstuff.com/2016/04/oracle-undo-tablespace/";
page.open(url, function(status) {
if ('success' !== status) {
console.log("Error");
} else {
page.switchToChildFrame('aswift_1');
page.switchToChildFrame('google_ads_frame2');
fs.write("test-google-source.htm", page.frameContent, 'w');
page.render('test-google-render.png');
}
phantom.exit();
});