NodeJS crawler undefined不是一个函数

时间:2014-11-15 18:20:04

标签: javascript jquery node.js web-crawler

我正在使用NodeJs的抓取工具

这是我的代码:

var Crawler = require("crawler");
//var jsdom = require('jsdom');
var url = require('url');
var fs = require('fs');

if (typeof String.prototype.startsWith != 'function') {
  // see below for better implementation!
  console.log("added");
  String.prototype.startsWith = function (str){
    return this.indexOf(str) == 0;
  };
}

var c = new Crawler({
    maxConnections: 10,

    // This will be called for each crawled page
    callback: function (error, result,$) {
        // $ is Cheerio by default
        //a lean implementation of core jQuery designed specifically for the server
        if(result.request.uri.href.startsWith("http://www.geocaching.com/geocache/")){
            var titel = $('#ctl00_ContentBody_CacheName');
            var coords = $('#uxLatLon');

            console.log(titel +": "+ coords);
        }
        $('a').each(function(index, a) {
            var toQueueUrl = $(a).attr('href');
            c.queue(toQueueUrl);
        });

    }
});

c.queue('http://www.geocaching.com/');

但在运行一段时间后,我收到此错误:

TypeError: undefined is not a function
    at Object.Crawler.callback (C:\Users\Lukas\Documents\Geocachcrawler\app.js:27:9)
    at Crawler._onContent (C:\Users\Lukas\Documents\Geocachcrawler\node_modules\crawler\lib\crawler.js:462:17)
    at Request._callback (C:\Users\Lukas\Documents\Geocachcrawler\node_modules\crawler\lib\crawler.js:352:18)
    at Request.self.callback (C:\Users\Lukas\Documents\Geocachcrawler\node_modules\crawler\node_modules\request\request.js:236:22)
    at Request.emit (events.js:98:17)
    at Request.<anonymous> (C:\Users\Lukas\Documents\Geocachcrawler\node_modules\crawler\node_modules\request\request.js:1142:14)
    at Request.emit (events.js:117:20)
    at IncomingMessage.<anonymous> (C:\Users\Lukas\Documents\Geocachcrawler\node_modules\crawler\node_modules\request\request.js:1096:12)
    at IncomingMessage.emit (events.js:117:20)
    at _stream_readable.js:943:16

1 个答案:

答案 0 :(得分:2)

问题在于这一行:

$('a').each(function(index, a) {

您正在访问没有a标记的页面,因此jQuery对象为空,并且无法在其上运行函数。在运行每个函数之前,您必须检查以确保它不是空的。

var a = $('a');
if(a.length != 0){
  $('a').each(function(index, a) {
        var toQueueUrl = $(a).attr('href');
        c.queue(toQueueUrl);
    });
}

更新:我可能不正确,JSfiddle doesn't throw this error with jQuery 1.11.0。您使用的是什么版本的jQuery?

编辑:你确定要包含jQuery吗?它可能会在选择器上抛出错误。