我正在运行一个小项目,需要一些亚马逊图书的预览内容(它们可以是PNG图像或HTML内容)。
例如,本书:https://www.amazon.com/gp/product/B00JNYEXCK/。
点击"查看内部"徽章(带有id =" sitbLogoImg"的img标签),会出现一个新框架,显示本书的预览内容。它有2个版本,打印预览(这些是PNG图像,我可以得到它们)和kindle预览(这是iframe文档)。
我坚持使用iframe进行点击预览,基本上看起来像这样:
<div id="scrollElm-0" class="pageHtml">
<div id="sitbReaderKindleSample">
<iframe id="sitbReaderFrame">
<html>
<head></head>
<body>
<p>.......</p>
<div>......</div>
....
</body>
</html>
</iframe>
</div>
</div>
&#13;
这是我的CasperJS脚本:
var fs = require('fs');
var casper = require('casper').create({
pageSettings: {
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
}
});
casper.options.viewportSize = {
width: 1366,
height: 768
};
casper.options.waitTimeout = 10000;
// use any cookies
var cookieFilename = "cookies.txt";
var data = fs.read(cookieFilename);
if (data) {
phantom.cookies = JSON.parse(data);
}
casper.start('https://www.amazon.com/gp/product/B00JNYEXCK/', function() {
this.echo(this.status(true));
this.captureSelector('before.png', 'html');
});
casper.waitForSelector('img#sitbLogoImg', function() {
//this.captureSelector('before.png','html');
});
casper.then(function() {
this.click('img#sitbLogoImg');
});
casper.waitForSelector('div#sitbLBHeader', function() {
});
var lis_content = '';
casper.wait(3000, function() {
this.captureSelector('after.png', 'html');
});
casper.withFrame(1, function() {
lis_content = this.getHTML();
this.captureSelector('lis_content.png', 'html');
});
//Write the sitbReaderFrame to file
casper.then(function() {
var lis_content_filename = 'lis_content.html';
fs.write(lis_content_filename, lis_content, 644);
});
// write the cookies
casper.wait(1000, function() {
var cookies = JSON.stringify(phantom.cookies);
fs.write(cookieFilename, cookies, 644);
});
casper.run();
&#13;
问题是iframe只有id =&#34; sitbReaderFrame&#34;但没有名字,我已经尝试了casperjs.withFrame,帧索引号从0到4,但它似乎没有出现在CapserJS视图中。
我想听听你的任何建议,因为我真的被困在这里。非常感谢,抱歉我的英语不好。
答案 0 :(得分:1)
CasperJS
脚本:
function on_init (page){
var width='1600',height='900';
page.viewportSize = {width:width,height:height}
page.evaluate(function (width,height){
screen = {width:width,height:height,availWidth:width,availHeight:height};
innerWidth=width; innerHeight=height; outerWidth=width; outerHeight=height;
window.navigator = {
plugins: {length: 2, 'Shockwave Flash': {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}},
mimeTypes: {length: 2, "application/x-shockwave-flash": {description: "Shockwave Flash", suffixes: "swf", type: "application/x-shockwave-flash", enabledPlugin: {name: 'Shockwave Flash', filename: '/usr/lib/flashplugin-nonfree/libflashplayer.so', description: 'Shockwave Flash 11.2 r202', version: '11.2.202.440'}}},
appCodeName: "Mozilla",
appName: "Netscape",
appVersion: "5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
cookieEnabled: 1,
languages: "en-US,en",
language: "en",
onLine: 1,
doNotTrack: null,
platform: "Linux x86_64",
product: "Gecko",
vendor: "Google Inc.",
vendorSub: "",
productSub: 20030107,
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36",
geolocation: {getCurrentPosition: function getCurrentPosition(){},watchPosition: function watchPosition(){},clearWatch: function clearWatch(){}},
javaEnabled: function javaEnabled(){return 0} };},width,height);};
var casper = require('casper').create({
verbose: true,
logLevel: 'debug',
waitTimeout: 5000,
userAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.21 Safari/537.36'
}), fs = require('fs');
casper
.on("error", function(msg){ this.echo("error: " + msg, "ERROR") })
.on("page.error", function(msg, trace){ this.echo("Page Error: " + msg, "ERROR") })
.on("remote.message", function(msg){ this.echo("Info: " + msg, "INFO") })
.on('page.initialized', on_init)
.start("https://www.amazon.com/gp/product/B00JNYEXCK/", function(){
this.click('#ebooksSitbLogoImg');
this
.capture('lis.png')
.wait(3000,function(){
var index =this.evaluate(function(){var i,x=document.querySelectorAll('iframe'),r;
for(i=0;i<x.length;i++){if(x[i].id=="sitbReaderFrame"){r=i+1}}return r;});
this
.echo("The index is: "+index,"INFO")
.capture('lis_content.png')
.withFrame(index,function(){
fs.write('lis_content.html', this.getHTML(), 644);
})
})
})
.run();
&#13;
您需要使用--cookies-file
选项,以避免阻止。
<强> 强>
./casperjs --cookies-file=./cookies_1.txt casis.js >/dev/stdout
如果要打印:
错误:CasperError:无法在不存在的选择器上调度mousedown事件:#ebooksSitbLogoImg
无论如何都不能阻止。
在那种情况下 重新连接到互联网并获取新的IP地址后再试一次。