在递归处理页面上的链接之后,我很难让casperjs继续下一页。
我可以让它从每个页面获取数据并浏览页面,或点击页面上的每个链接,但我不能同时执行这两个链接。
var utils = require('utils');
var x = require('casper').selectXPath;
var casper = require('casper').create({
verbose: true,
logLevel: 'error',
waitTimeout: 10000,
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
}
});
var currentPage = 1;
var i = 0;
var links = [];
var link_titles = [];
var terminate = function() {
this.echo("Exiting..").exit();
};
function getSelectedPage() {
var el = document.querySelector('td.cur');
return parseInt(el.textContent);
}
function getPageLinks () {
var links = document.querySelectorAll('h3.r a');
return [].map.call(links, function(link) {
return link.getAttribute('href');
});
}
function getLinkData(link) {
this.thenOpen(link, function() {
var title = this.getTitle();
// Add the data from link
var data = {
title: title,
};
link_titles.push(data);
});
}
function loopThroughLinks() {
if( i < links.length) {
this.echo('[LINK #' + i + '] '+ links[i]);
getLinkData.call(this, links[i]);
i++;
this.run(loopThroughLinks);
} else {
utils.dump(link_titles);
}
}
function linkData(){
links = this.evaluate(getPageLinks);
this.run(loopThroughLinks);
}
var processPage = function() {
this.run(linkData);
//PROBLEM EXISTS BELOW HERE - IF YOU COMMENT OUT FROM HERE IT RUNS AS EXPECTED FOR THE FIRST PAGE
//WITH CODE BELOW INCLUDED, SKIPS this.run(linkData) AND JUST GOES THROUGH PAGES;
this.then(function(){
if (currentPage >= 3) {
return terminate.call(casper);
}
currentPage++;
this.echo("requesting next page: " + currentPage);
this.capture("google-results-p" + currentPage + ".png");
this.thenClick('a.pn span').then(function(){
this.waitFor(function(){
return currentPage === this.evaluate(getSelectedPage);
}, processPage, terminate);
});
}); //COMMENT OUT TO HERE FOR WORKING ONE PAGE VERSION
}
casper.start('https://www.google.co.uk/?gws_rd=ssl#q=casperjs');
casper.run(processPage);
更新了反映多次运行调用的代码。现在循环浏览第一页,但是从第一页打印所有其他页面的结果??
var utils = require('utils');
var x = require('casper').selectXPath;
var casper = require('casper').create({
verbose: true,
logLevel: 'error',
waitTimeout: 10000,
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
}
});
var currentPage = 1;
var i = 0;
var links = [];
var link_titles = [];
var terminate = function() {
this.echo("Exiting..").exit();
};
function getSelectedPage() {
var el = document.querySelector('td.cur');
return parseInt(el.textContent);
}
function getPageLinks() {
var links = document.querySelectorAll("h3.r a");
return Array.prototype.map.call(links, function(e) {
try {
// google handles redirects hrefs to some script of theirs
return (/url\?q=(.*)&sa=U/).exec(e.getAttribute("href"))[1];
} catch (err) {
return e.getAttribute("href");
}
});
}
function getLinkData(link) {
this.thenOpen(link, function() {
//var title = this.fetchText('title');
var title = this.getTitle();
// Add the staff data from link
var data = {
title: title,
};
link_titles.push(data);
this.then(function(){ ///ADDED - BACK TO RIGHT PAGE FOR SELECTOR
this.back();
});
});
}
function loopThroughLinks() {
if( i < links.length) {
this.echo('[LINK #' + i + '] '+ links[i]);
getLinkData.call(this, links[i]);
i++;
this.then(loopThroughLinks);
} else {
utils.dump(link_titles);
}
}
function linkData(){
links = this.evaluate(getPageLinks);
this.then(loopThroughLinks);
}
var processPage = function() {
this.wait(2000, function(){
this.then(linkData);
});
this.wait(2000, function(){
this.then(function(){
if (currentPage >= 3) {
return terminate.call(casper);
}
this.echo("requesting next page: " + currentPage);
this.capture("google-results-p" + currentPage + ".png");
currentPage++;
this.thenClick('a.pn span').then(function(){
this.capture('google-results-2-p' + currentPage + '.png');
this.waitFor(function(){
return currentPage === this.evaluate(getSelectedPage);
}, processPage, terminate);
});
});
});
}
casper.start('https://www.google.co.uk/?gws_rd=ssl#q=casperjs');
casper.then(processPage);
casper.run();
答案 0 :(得分:0)
您必须只有一个public class UriAdpter extends TypeAdapter<Uri> {
@Override
public void write(JsonWriter out, Uri value) throws IOException {
out.value(value.toString());
}
@Override
public Uri read(JsonReader in) throws IOException {
if (in.peek() == JsonToken.NULL) {
in.nextNull();
return null;
}
return Uri.parse(in.nextString());
}
}
(并且只有一个casper.run()
)来电。 casper.start()
启动CasperJS步骤队列,如果没有进一步的步骤,将完成执行。唯一需要保留的呼叫是run()
,但所有其他casper.run(processPage);
呼叫需要更改为this.run(...)
。