我尝试使用以下步骤模拟Google用户查询搜索。
我可以看到引擎点击目标链接,但不幸的是,如果我尝试从第77行的页面中提取链接,78确实无效。知道我做错了什么吗?
var casper = require("casper").create({
waitTimeout: 1000,
pageSettings: {
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:23.0) Gecko/20130404 Firefox/23.0",
loadImages: false, // The WebPage instance used by Casper will
loadPlugins: false,
verbose: true,
logLevel: "debug"
}
});
var x = require('casper').selectXPath;
casper.options.waitTimeout = 10000;
var currentPage = 1;
var links = [];
function getLinks() {
var links = document.querySelectorAll("h3.r a");
return Array.prototype.map.call(links, function(e) {
try {
return (/url\?q=(.*)&sa=U/).exec(e.getAttribute("href"))[1];
} catch (err) {
return e.getAttribute("href");
}
});
}
var terminate = function() {
this.echo("that's all, folks.").exit();
};
casper.on("http.status.200", function(resource) {
this.echo(resource.url + " is OK", "INFO");
});
casper.on("http.status.301", function(resource) {
this.echo(resource.url + " is permanently redirected", "PARAMETER");
});
casper.on("http.status.302", function(resource) {
this.echo(resource.url + " is temporarily redirected", "PARAMETER");
});
casper.on("http.status.404", function(resource) {
this.echo(resource.url + " is not found", "COMMENT");
});
casper.on("http.status.500", function(resource) {
this.echo(resource.url + " is in error", "ERROR");
});
var processCustomerPage = function() {
this.echo(this.getCurrentUrl());
};
var processPage = function() {
var url;
this.wait(10000, function() {
if (currentPage >= 5 || !this.exists("#pnnext")) {
return terminate.call(casper);
}
currentPage++;
this.echo("requesting next page: " + currentPage);
url = this.getCurrentUrl();
this.wait(3000, function() {
this.waitForSelector(x("//a[normalize-space(text())='testing - Phantom/Caperjs User input - Stack Overflow']"), function() {
this.click(x("//a[normalize-space(text())='testing - Phantom/Caperjs User input - Stack Overflow']"));
this.echo(this.getCurrentUrl());
links = this.evaluate(getLinks);
this.echo(this.getCurrentUrl());
this.echo(' [+] ' + links.join("\n - "));
this.reload(function() {
this.echo(this.getCurrentUrl());
});
this.echo("loaded again");
this.echo(this.getTitle());
return terminate.call(casper);
});
return terminate.call(casper);
});
this.echo(this.getCurrentUrl());
this.thenClick("#pnnext").then(function() {
this.waitFor(function() {
return url !== this.getCurrentUrl();
}, processPage, terminate);
});
});
};
casper.start("http://google.de/", function() {
this.fill('form[action="/search"]', {
q: 'casperjs'
}, true);
});
casper.waitForSelector('#pnnext', processPage, terminate);
casper.run();