我正在使用NodeJs的抓取工具
这是我的代码:
var Crawler = require("crawler");
//var jsdom = require('jsdom');
var url = require('url');
var fs = require('fs');
if (typeof String.prototype.startsWith != 'function') {
// see below for better implementation!
console.log("added");
String.prototype.startsWith = function (str){
return this.indexOf(str) == 0;
};
}
var c = new Crawler({
maxConnections: 10,
// This will be called for each crawled page
callback: function (error, result,$) {
// $ is Cheerio by default
//a lean implementation of core jQuery designed specifically for the server
if(result.request.uri.href.startsWith("http://www.geocaching.com/geocache/")){
var titel = $('#ctl00_ContentBody_CacheName');
var coords = $('#uxLatLon');
console.log(titel +": "+ coords);
}
$('a').each(function(index, a) {
var toQueueUrl = $(a).attr('href');
c.queue(toQueueUrl);
});
}
});
c.queue('http://www.geocaching.com/');
但在运行一段时间后,我收到此错误:
TypeError: undefined is not a function
at Object.Crawler.callback (C:\Users\Lukas\Documents\Geocachcrawler\app.js:27:9)
at Crawler._onContent (C:\Users\Lukas\Documents\Geocachcrawler\node_modules\crawler\lib\crawler.js:462:17)
at Request._callback (C:\Users\Lukas\Documents\Geocachcrawler\node_modules\crawler\lib\crawler.js:352:18)
at Request.self.callback (C:\Users\Lukas\Documents\Geocachcrawler\node_modules\crawler\node_modules\request\request.js:236:22)
at Request.emit (events.js:98:17)
at Request.<anonymous> (C:\Users\Lukas\Documents\Geocachcrawler\node_modules\crawler\node_modules\request\request.js:1142:14)
at Request.emit (events.js:117:20)
at IncomingMessage.<anonymous> (C:\Users\Lukas\Documents\Geocachcrawler\node_modules\crawler\node_modules\request\request.js:1096:12)
at IncomingMessage.emit (events.js:117:20)
at _stream_readable.js:943:16
答案 0 :(得分:2)
问题在于这一行:
$('a').each(function(index, a) {
您正在访问没有a
标记的页面,因此jQuery对象为空,并且无法在其上运行函数。在运行每个函数之前,您必须检查以确保它不是空的。
var a = $('a');
if(a.length != 0){
$('a').each(function(index, a) {
var toQueueUrl = $(a).attr('href');
c.queue(toQueueUrl);
});
}
更新:我可能不正确,JSfiddle doesn't throw this error with jQuery 1.11.0。您使用的是什么版本的jQuery?
编辑:你确定要包含jQuery吗?它可能会在选择器上抛出错误。