CasperJS手册示例错误?

时间:2014-08-16 04:58:16

标签: casperjs

CasperJS手册中的

This example显示了如何从Google抓取网址。它表明网址会很干净。但是,当我运行此示例时,我的输出如下所示:

20 links found:
 - /url?q=http://casperjs.org/&sa=U&ei=_-TuU-KBC83-yQSu5YKQAg&ved=0CBQQFjAA&usg=AFQjCNH321k0JXrSx5WZp-fH6JwxX-O75Q
 - /url?q=http://code4fun.fr/tutoriel-casperjs/&sa=U&ei=_-TuU-KBC83-yQSu5YKQAg&ved=0CBoQFjAB&usg=AFQjCNHreU-9mg7OZxK3TOl94HDPOnA_aQ
 - /url?q=http://casperjs.readthedocs.org/&sa=U&ei=_-TuU-KBC83-yQSu5YKQAg&ved=0CCEQFjAC&usg=AFQjCNGzX6V5ZQtmCwHwZerHR3ftK3pHOw
 - /url?q=https://github.com/n1k0/casperjs&sa=U&ei=_-TuU-KBC83-yQSu5YKQAg&ved=0CCcQFjAD&usg=AFQjCNEiGMDpYiPm1qXK7ZxDCwWwKjAStg
 - /url?q=http://www.technologies-ebusiness.com/enjeux-et-tendances/casperjs-pour-des-tests-d-integration&sa=U&ei=_-TuU-KBC83-yQSu5YKQAg&ved=0CC4QFjAE&usg=AFQjCNFOGl1p6ApqP8TmAxhtQp33DHpbcQ
 - /url?q=https://www.lullabot.com/blog/article/testing-front-end-casperjs&sa=U&ei=_-TuU-KBC83-yQSu5YKQAg&ved=0CDQQFjAF&usg=AFQjCNG53ZxHl8yZ0JGdzNbwKuZmPOLqCg
 - /url?q=http://blog.newrelic.com/2013/06/04/simpler-ui-testing-with-casperjs-2/&sa=U&ei=_-TuU-KBC83-yQSu5YKQAg&ved=0CDoQFjAG&usg=AFQjCNFzlDb7R4Uv-jj_3S5IbJUpKF-7fA
 - /url?q=https://www.npmjs.org/package/grunt-casperjs&sa=U&ei=_-TuU-KBC83-yQSu5YKQAg&ved=0CEEQFjAH&usg=AFQjCNGn-dwJpkX_XTQv8YnFZTClcLosJA
 - /url?q=http://www.phase2technology.com/blog/behavorial-test-for-custom-entity-using-casperjs/&sa=U&ei=_-TuU-KBC83-yQSu5YKQAg&ved=0CEcQFjAI&usg=AFQjCNFG0KDAADmocesrDoqQTHW6PPO8KQ
 - /url?q=http://blog.codeship.io/2013/03/07/smoke-testing-with-casperjs.html&sa=U&ei=_-TuU-KBC83-yQSu5YKQAg&ved=0CE4QFjAJ&usg=AFQjCNG5AsT2iKCnN-utrCGsthCZCpYKaQ
 - /url?q=http://phantomjs.org/&sa=U&ei=_-TuU9yhG4iZyASb8oK4Dw&ved=0CBQQFjAA&usg=AFQjCNGXz7tw-UkfDOpqvYV89KlcJPGfHQ
 - /url?q=http://phantomjs.org/download.html&sa=U&ei=_-TuU9yhG4iZyASb8oK4Dw&ved=0CB8QFjAB&usg=AFQjCNG_czKcYiFKskAvoRl1CceXuTJecA
 - /url?q=http://www.mathieurobin.com/2013/04/phantomjs-chargez-et-jouez-avec-vos-sites-en-js-sans-quitter-la-console/&sa=U&ei=_-TuU9yhG4iZyASb8oK4Dw&ved=0CCUQFjAC&usg=AFQjCNEAtYz0zcsRVYy-37U9sJL7e9EqYQ
 - /url?q=http://svay.com/blog/paris-js-10-introduction-a-phantomjs-un-navigateur-webkit-headless/&sa=U&ei=_-TuU9yhG4iZyASb8oK4Dw&ved=0CCsQFjAD&usg=AFQjCNE9dUuVQmNpK064a9GPJyOIetUWAA
 - /url?q=https://github.com/ariya/phantomjs&sa=U&ei=_-TuU9yhG4iZyASb8oK4Dw&ved=0CDIQFjAE&usg=AFQjCNErqnWYxIVwBwXeUjaSd4SFicQqpw
 - /url?q=https://github.com/gruntjs/grunt-lib-phantomjs&sa=U&ei=_-TuU9yhG4iZyASb8oK4Dw&ved=0CDkQFjAF&usg=AFQjCNHkRVx926JJkKhdoKxKsKVcQc-QTg
 - /url?q=http://blog.octo.com/seo-spa-angular/&sa=U&ei=_-TuU9yhG4iZyASb8oK4Dw&ved=0CD8QFjAG&usg=AFQjCNFcj-ykUo-rSQKlcEZIy1qjSlW-oQ
 - /url?q=https://www.npmjs.org/package/phantomjs&sa=U&ei=_-TuU9yhG4iZyASb8oK4Dw&ved=0CEYQFjAH&usg=AFQjCNGweWRdm8qjqxOOybFgtz5B8CnMDQ
 - /url?q=http://code.google.com/p/phantomjs/&sa=U&ei=_-TuU9yhG4iZyASb8oK4Dw&ved=0CEwQFjAI&usg=AFQjCNEwvNx7NNMDAaiqHZ_y-3Bbf62W_w
 - /url?q=http://casperjs.org/&sa=U&ei=_-TuU9yhG4iZyASb8oK4Dw&ved=0CE4QFjAJ&usg=AFQjCNGNEKkl1eWaFx9Sz6R7ZFVN9r1Bhw

这是他们运行的代码:

var links = [];
var casper = require('casper').create();

function getLinks() {
    var links = document.querySelectorAll('h3.r a');
    return Array.prototype.map.call(links, function(e) {
        return e.getAttribute('href');
    });
}

casper.start('http://google.fr/', function() {
    // search for 'casperjs' from google form
    this.fill('form[action="/search"]', { q: 'casperjs' }, true);
});

casper.then(function() {
    // aggregate results for the 'casperjs' search
    links = this.evaluate(getLinks);
    // now search for 'phantomjs' by filling the form again
    this.fill('form[action="/search"]', { q: 'phantomjs' }, true);
});

casper.then(function() {
    // aggregate results for the 'phantomjs' search
    links = links.concat(this.evaluate(getLinks));
});

casper.run(function() {
    // echo results in some pretty fashion
    this.echo(links.length + ' links found:');
    this.echo(' - ' + links.join('\n - ')).exit();
});

有人可以解释发生了什么以及为什么我的网址不像他们那样干净吗?

2 个答案:

答案 0 :(得分:0)

这个例子很好,因为你得到了网址,但有一点点噪音。看起来google同时改变了hrefs。所以你可以添加

links = links.map(function(link){
    return link.substring(0, link.indexOf("&sa=U&ei=")).replace("/url?q=", "");
});

join最后一步中的链接之前。

答案 1 :(得分:0)

我实际上找到了一个使用名为getLinks()的函数的工作。这很好用,应该更适合我需要的东西。使用split()pop()的组合可以让您获得所需内容。

代码:

   function getLinks() {
    var links = document.querySelectorAll('h3.r a');
    return Array.prototype.map.call(links, function(e) {
        return e.getAttribute('href').split('/').pop();
    });
}

casper.start(googleSearch, function() {
    links = this.evaluate(getLinks);
});

casper.run(function() {
    // echo results in some pretty fashion
    this.echo(links.length + ' links found:');
    this.echo(links.join('\n')).exit();
});