如何点击谷歌搜索链接并关注网址

时间:2015-09-15 16:07:38

标签: javascript phantomjs casperjs

我尝试使用以下步骤模拟Google用户查询搜索。

  1. 打开谷歌搜索索引页面并发送搜索查询
  2. 通过css选择器解析链接文本
  3. 并点击目标,否则请点击下一页
  4. 我可以看到引擎点击目标链接,但不幸的是,如果我尝试从第77行的页面中提取链接,78确实无效。知道我做错了什么吗?

    var casper = require("casper").create({
        waitTimeout: 1000,
        pageSettings: {
            userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:23.0) Gecko/20130404 Firefox/23.0",
            loadImages: false, // The WebPage instance used by Casper will
            loadPlugins: false,
            verbose: true,
            logLevel: "debug"
        }
    
    });
    var x = require('casper').selectXPath;
    casper.options.waitTimeout = 10000;
    var currentPage = 1;
    var links = [];
    
    
    function getLinks() {
        var links = document.querySelectorAll("h3.r a");
        return Array.prototype.map.call(links, function(e) {
            try {
                return (/url\?q=(.*)&sa=U/).exec(e.getAttribute("href"))[1];
            } catch (err) {
                return e.getAttribute("href");
            }
        });
    }
    
    
    var terminate = function() {
        this.echo("that's all, folks.").exit();
    };
    
    casper.on("http.status.200", function(resource) {
        this.echo(resource.url + " is OK", "INFO");
    });
    
    casper.on("http.status.301", function(resource) {
        this.echo(resource.url + " is permanently redirected", "PARAMETER");
    });
    
    casper.on("http.status.302", function(resource) {
        this.echo(resource.url + " is temporarily redirected", "PARAMETER");
    });
    
    casper.on("http.status.404", function(resource) {
        this.echo(resource.url + " is not found", "COMMENT");
    });
    
    casper.on("http.status.500", function(resource) {
        this.echo(resource.url + " is in error", "ERROR");
    });
    
    var processCustomerPage = function() {
        this.echo(this.getCurrentUrl());
    };
    
    
    var processPage = function() {
        var url;
    
        this.wait(10000, function() {
            if (currentPage >= 5 || !this.exists("#pnnext")) {
                return terminate.call(casper);
            }
    
            currentPage++;
            this.echo("requesting next page: " + currentPage);
            url = this.getCurrentUrl();
    
    
            this.wait(3000, function() {
                this.waitForSelector(x("//a[normalize-space(text())='testing - Phantom/Caperjs User input - Stack Overflow']"), function() {
                    this.click(x("//a[normalize-space(text())='testing - Phantom/Caperjs User input - Stack Overflow']"));
                    this.echo(this.getCurrentUrl());
    
                                                    links = this.evaluate(getLinks);
                            this.echo(this.getCurrentUrl());
                            this.echo(' [+] ' + links.join("\n - "));
    
                    this.reload(function() {
                        this.echo(this.getCurrentUrl());
                    });
    
                    this.echo("loaded again");
                    this.echo(this.getTitle());
                    return terminate.call(casper);
    
                });
    
    
                return terminate.call(casper);
            });
    
            this.echo(this.getCurrentUrl());
    
            this.thenClick("#pnnext").then(function() {
    
                this.waitFor(function() {
                    return url !== this.getCurrentUrl();
                }, processPage, terminate);
            });
        });
    };
    
    casper.start("http://google.de/", function() {
        this.fill('form[action="/search"]', {
            q: 'casperjs'
        }, true);
    });
    
    casper.waitForSelector('#pnnext', processPage, terminate);
    
    casper.run();
    

0 个答案:

没有答案