CasperJS不与亚马逊iframe合作

时间:2016-12-30 07:12:51

标签: iframe phantomjs casperjs

我正在运行一个小项目,需要一些亚马逊图书的预览内容(它们可以是PNG图像或HTML内容)。

例如,本书:https://www.amazon.com/gp/product/B00JNYEXCK/

点击"查看内部"徽章(带有id =" sitbLogoImg"的img标签),会出现一个新框架,显示本书的预览内容。它有2个版本,打印预览(这些是PNG图像,我可以得到它们)和kindle预览(这是iframe文档)。

我坚持使用iframe进行点击预览,基本上看起来像这样:



<div id="scrollElm-0" class="pageHtml">
  <div id="sitbReaderKindleSample">
    <iframe id="sitbReaderFrame">
      <html>
      <head></head>
      <body>
        <p>.......</p>
        <div>......</div>
        ....
      </body>
      </html>
    </iframe>
  </div>
</div>
&#13;
&#13;
&#13;

这是我的CasperJS脚本:

&#13;
&#13;
var fs = require('fs');
var casper = require('casper').create({
  pageSettings: {
    loadPlugins: false,
    userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
  }
});
casper.options.viewportSize = {
  width: 1366,
  height: 768
};
casper.options.waitTimeout = 10000;

// use any cookies
var cookieFilename = "cookies.txt";
var data = fs.read(cookieFilename);
if (data) {
  phantom.cookies = JSON.parse(data);
}

casper.start('https://www.amazon.com/gp/product/B00JNYEXCK/', function() {
  this.echo(this.status(true));
  this.captureSelector('before.png', 'html');
});
casper.waitForSelector('img#sitbLogoImg', function() {
  //this.captureSelector('before.png','html');
});
casper.then(function() {
  this.click('img#sitbLogoImg');
});
casper.waitForSelector('div#sitbLBHeader', function() {

});
var lis_content = '';
casper.wait(3000, function() {
  this.captureSelector('after.png', 'html');
});
casper.withFrame(1, function() {
  lis_content = this.getHTML();
  this.captureSelector('lis_content.png', 'html');
});

//Write the sitbReaderFrame to file
casper.then(function() {
  var lis_content_filename = 'lis_content.html';
  fs.write(lis_content_filename, lis_content, 644);
});

// write the cookies
casper.wait(1000, function() {
  var cookies = JSON.stringify(phantom.cookies);
  fs.write(cookieFilename, cookies, 644);
});
casper.run();
&#13;
&#13;
&#13;

问题是iframe只有id =&#34; sitbReaderFrame&#34;但没有名字,我已经尝试了casperjs.withFrame,帧索引号从0到4,但它似乎没有出现在CapserJS视图中。

我想听听你的任何建议,因为我真的被困在这里。非常感谢,抱歉我的英语不好。

1 个答案:

答案 0 :(得分:1)

CasperJS脚本:

&#13;
&#13;
function on_init (page){
var width='1600',height='900';

page.viewportSize = {width:width,height:height}
page.evaluate(function (width,height){
screen = {width:width,height:height,availWidth:width,availHeight:height};
innerWidth=width;  innerHeight=height;   outerWidth=width;  outerHeight=height;
window.navigator = {
plugins: {length: 2, 'Shockwave Flash': {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}},
mimeTypes: {length: 2, "application/x-shockwave-flash": {description: "Shockwave Flash", suffixes: "swf", type: "application/x-shockwave-flash", enabledPlugin: {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}}},
appCodeName: "Mozilla",
appName: "Netscape",
appVersion: "5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
cookieEnabled: 1,
languages: "en-US,en",
language: "en",
onLine: 1,
doNotTrack: null,
platform: "Linux x86_64",
product: "Gecko",
vendor: "Google Inc.",
vendorSub: "",
productSub: 20030107,
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
geolocation: {getCurrentPosition: function getCurrentPosition(){},watchPosition: function watchPosition(){},clearWatch: function clearWatch(){}},
javaEnabled: function javaEnabled(){return 0} };},width,height);};

var casper = require('casper').create({
    verbose: true,
    logLevel: 'debug',
    waitTimeout: 5000,
    userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36'
}), fs = require('fs');

casper
.on("error", function(msg){ this.echo("error: " + msg, "ERROR") })
.on("page.error", function(msg, trace){ this.echo("Page Error: " + msg, "ERROR") })
.on("remote.message", function(msg){ this.echo("Info: " + msg, "INFO") })
.on('page.initialized', on_init)

    .start("https://www.amazon.com/gp/product/B00JNYEXCK/", function(){
    this.click('#ebooksSitbLogoImg');
    this
    .capture('lis.png')
    .wait(3000,function(){
    var index =this.evaluate(function(){var i,x=document.querySelectorAll('iframe'),r;
    for(i=0;i<x.length;i++){if(x[i].id=="sitbReaderFrame"){r=i+1}}return r;});
    this
    .echo("The index is: "+index,"INFO")
    .capture('lis_content.png')
    .withFrame(index,function(){
    fs.write('lis_content.html', this.getHTML(), 644);
    })
})
})
      .run();
&#13;
&#13;
&#13;
您需要使用--cookies-file选项,以避免阻止。

<强>

./casperjs --cookies-file=./cookies_1.txt casis.js >/dev/stdout

如果要打印:

错误:CasperError:无法在不存在的选择器上调度mousedown事件:#ebooksSitbLogoImg

无论如何都不能阻止。

在那种情况下 重新连接到互联网并获取新的IP地址后再试一次。