我无法点击DOM中的所有基于JavaScript的链接并保存 输出。链接的格式为
<a id="html" href="javascript:void(0);" onclick="goToHtml();">HTML</a>
以下代码效果很好:
var casper = require('casper').create();
var fs = require('fs');
var firstUrl = 'http://www.testurl.com/test.html';
var css_selector = '#jan_html';
casper.start(firstUrl);
casper.thenClick(css_selector, function(){
console.log("whoop");
});
casper.waitFor(function check() {
return this.getCurrentUrl() != firstUrl;
}, function then() {
console.log(this.getCurrentUrl());
var file_title = this.getTitle().split(' ').join('_') + '.html';
fs.write(file_title, this.getPageContent());
});
casper.run();
但是,我如何才能使用“a”的选择器,点击全部 可用的链接和保存内容?我不知道如何让clickWhileSelector从选择器中删除节点,如下所示:Click on all links matching a selector
答案 0 :(得分:14)
我有这个脚本首先从页面获取所有链接然后将'href'属性保存到数组,然后迭代这个数组然后逐个打开每个链接并回显网址:
var casper = require('casper').create({
logLevel:"verbose",
debug:true
});
var links;
casper.start('http://localhost:8000');
casper.then(function getLinks(){
links = this.evaluate(function(){
var links = document.getElementsByTagName('a');
links = Array.prototype.map.call(links,function(link){
return link.getAttribute('href');
});
return links;
});
});
casper.then(function(){
this.each(links,function(self,link){
self.thenOpen(link,function(a){
this.echo(this.getCurrentUrl());
});
});
});
casper.run(function(){
this.exit();
});
答案 1 :(得分:1)
href
属性(实际网址),则 rusln's answer效果很好。如果你想点击同时触发javascript函数的每个a
,你可能需要在元素上迭代一些其他方式。
我建议使用XPath generator from stijn de ryck for an element。
a
打开页面,并通过XPath单击它。 var startURL = 'http://localhost:8000',
xPaths
x = require('casper').selectXPath;
casper.start(startURL);
casper.then(function getLinks(){
xPaths = this.evaluate(function(){
// copied from https://stackoverflow.com/a/5178132/1816580
function createXPathFromElement(elm) {
var allNodes = document.getElementsByTagName('*');
for (var segs = []; elm && elm.nodeType == 1; elm = elm.parentNode) {
if (elm.hasAttribute('id')) {
var uniqueIdCount = 0;
for (var n=0;n < allNodes.length;n++) {
if (allNodes[n].hasAttribute('id') && allNodes[n].id == elm.id) uniqueIdCount++;
if (uniqueIdCount > 1) break;
};
if ( uniqueIdCount == 1) {
segs.unshift('id("' + elm.getAttribute('id') + '")');
return segs.join('/');
} else {
segs.unshift(elm.localName.toLowerCase() + '[@id="' + elm.getAttribute('id') + '"]');
}
} else if (elm.hasAttribute('class')) {
segs.unshift(elm.localName.toLowerCase() + '[@class="' + elm.getAttribute('class') + '"]');
} else {
for (i = 1, sib = elm.previousSibling; sib; sib = sib.previousSibling) {
if (sib.localName == elm.localName) i++; };
segs.unshift(elm.localName.toLowerCase() + '[' + i + ']');
};
};
return segs.length ? '/' + segs.join('/') : null;
};
var links = document.getElementsByTagName('a');
var xPaths = Array.prototype.map.call(links, createXPathFromElement);
return xPaths;
});
});
casper.then(function(){
this.each(xPaths, function(self, xpath){
self.thenOpen(startURL);
self.thenClick(x(xpath));
// waiting some time may be necessary for single page applications
self.wait(1000);
self.then(function(a){
// do something meaningful here
this.echo(this.getCurrentUrl());
});
// Uncomment the following line in case each click opens a new page instead of staying at the same page
//self.back()
});
});
casper.run(function(){
this.exit();
});