我尝试使用node-crawler项目https://github.com/sylvinus/node-crawler
这里是使用代码(它在doc中)
var Crawler = require("crawler");
var url = require('url');
var c = new Crawler({
maxConnections : 10,
// This will be called for each crawled page
callback : function (error, result, $) {
// $ is Cheerio by default
//a lean implementation of core jQuery designed specifically for the server
$('a').each(function(index, a) {
var toQueueUrl = $(a).attr('href');
c.queue(toQueueUrl);
});
}
});
// Queue just one URL, with default callback
c.queue('http://joshfire.com');
当我运行此代码时,它会发出此错误:
TypeError: undefined is not a function
at Object.Crawler.callback (/home/app/app.js:10:9)
at Crawler._onContent (/home/app/node_modules/crawler/lib/crawler.js:461:17)
at Request._callback (/home/app/node_modules/crawler/lib/crawler.js:351:18)
at Request.self.callback (/home/app/node_modules/crawler/node_modules/request/request.js:236:22)
at Request.emit (events.js:98:17)
at Request.<anonymous> (/home/app/node_modules/crawler/node_modules/request/request.js:1142:14)
at Request.emit (events.js:117:20)
at IncomingMessage.<anonymous> (/home/app/node_modules/crawler/node_modules/request/request.js:1096:12)
at IncomingMessage.emit (events.js:117:20)
at _stream_readable.js:944:16
我使用的node.js版本:v0.10.37
答案 0 :(得分:0)
// Update your node.js, node-crawler and try this code instead of your callback function
callback : function (error, res, done) {
var $ = res.$;
$('a').each(function(index, a) {
var toQueueUrl = $(a).attr('href');
c.queue(toQueueUrl);
});
}