我在市场上买一辆新车。我没有反复搜索经销商网站,而是认为这将是一个有趣而有趣的机会,可以学习一个小节点和mongodb,所以我正在抓我当地经销商的网站去抓取我感兴趣的品牌和型号。
我遇到的问题是节点在我的最终回调运行后不会终止。
var cheerio = require('cheerio');
var request = require('request');
var db = require('mongodb');
var S = require('string');
var log = require('console').log;
var async = require('async');
var links = [];
var website = 'http://www.yahoo.com';
async.series(
[
function(){
log('starting');
db.connect('mongodb://127.0.0.1:27017/test',
function(err, base){
if(err) throw err;
db = base;
});
},
request(website, start)
],
function(){
log('closing DB');
db.close();
});
function start(err,resp,body){
var $ = cheerio.load(body);
var numPages = 2;
$('.gbps').each(function(i,elem) {
links.push('http://www.yahoo.com');
});
var pageURLS = [];
for (var i = 2; i<=numPages; i++){
//create URLs for additional pages
pageURLS[i-2] = website;
}
var pages = 1;
log('getting page URLs');
pageURLS.forEach(function(url, index, array){
request(url, function(error,response,bodies) {
pages++;
var $ = cheerio.load(bodies);
$('.tab').each(function(i,elem) {
links.push('http://www.yahoo.com');
});
if (pages == numPages){
getDetailInfo();
};
});
});
}
function getDetailInfo(){
log(links.length);
links.forEach(function(link, index, array){
request(link, doStuff);
});
}
function doStuff(err, response, body){
if(err){
log(err);
}
parseDetailResponse(err,response,body, addToDB);
}
function parseDetailResponse(err,resp,body,callback){
log('parsing');
var $ = cheerio.load(body);
var specs = $('.specifications').children().map(function(i, elem){
var key = 'key';
var value = 'value';
var ret = {};
ret [ 'name' ] = key;
ret [ 'value' ] = value;
return ret;
});
var makeAndModel = 'makeAndModel';
callback(['picture url', 'vehicle description', 100, specs, makeAndModel]);
}
function getMakeAndModel(stuff){
var $ = cheerio.load(stuff);
temp = $('.gbps').map(function(i, elem){
var ret = {};
switch(i){
case 0:
ret['name'] = 'year';
ret['value'] = $(this).text();
break;
case 1:
ret['name'] = 'make';
ret['value'] = $(this).text();
break;
case 2:
ret['name'] = 'model';
ret['value'] = $(this).text();
break;
case 3:
ret['name'] = 'ignore';
ret['value'] = $(this).text();
break;
default:
ret['name'] = 'ignore';
ret['value'] = 'ignore';
}
return ret;
});
return temp;
}
function addToDB(arr){
log('adding to DB');
pic = arr[0];
description = arr[1];
price = arr[2];
specs = arr[3];
makeAndModel = arr[4];
var obj = {};
for (var i = specs.length - 1; i >= 0; i--) {
obj [specs[i].name] = specs[i].value;
};
for (var i = makeAndModel.length - 1; i >= 0; i--){
obj [makeAndModel[i].name] = makeAndModel[i].value;
};
db.collection('carsTest').update(
{VIN: obj.VIN},
{
$set: {
VIN: obj.VIN,
make: obj.make,
model: obj.model,
year: obj.year,
price: price,
engine: obj.Engine,
interior: obj.Interior,
exterior: obj.Exterior,
'model code': obj['Model Code'],
'stock number': S(obj['Stock Number']).toInt(),
transmission: obj.Transmission,
mileage: obj.Mileage ? obj.Mileage : 0,
description: description,
picture: pic,
}
},
{upsert: true, safe: true},
function(err,result){
if(err){
throw err;
}
});
log('finished with this one!');
}
我省略并更改了相当数量作为证据,没有进行大量错误检查或其他任何事情,但即使这样也会添加文档但不会退出。 Node只是坐在那里,等待发生的事情,它永远不会调用最终的回调来关闭数据库并退出。
> db.carsTest.find().pretty()
{
"_id" : ObjectId("52139aa7c9b7a39e0f1eb61d"),
"VIN" : null,
"description" : "vehicle description",
"engine" : null,
"exterior" : null,
"interior" : null,
"make" : null,
"mileage" : 0,
"model" : null,
"model code" : null,
"picture" : "picture url",
"price" : 100,
"stock number" : NaN,
"transmission" : null,
"year" : null
}
答案 0 :(得分:2)
我认为你误解了async.series
的工作原理。
async.series
中的函数不会将callback
作为参数,并且不会调用它。 request(...)
这些东西可能根本就不是一个功能。这可能就是它打破异步循环的原因。试试这个:
async.series(
[
function(callback) { // <--- missing callback
log('starting');
db.connect('mongodb://127.0.0.1:27017/test',
function(err, base){
if(err) throw err;
db = base;
callback(); // <--- missing callback
});
},
function(callback) { // <--- missing function with callback
request(website, function(err,resp,body) {
start(err, resp, body, callback);
})
}
],
function(){
log('closing DB');
db.close();
}
);
请注意,我在调用callback
时添加了start
参数。因此,您必须巧妙地重构代码,以便每个函数都接受callback
,当您知道所有作业都已完成时,可以在最后调用它。例如,您可以在async.parallel
中添加start
,此函数可能如下所示:
function start(err, resp, body, callback) {
// some stuff happens here
var jobs = []
pageURLS.forEach(function(url, index, array){
jobs.push(function(clb) {
request(url, function(error,response,bodies) {
// some stuff
clb(); // <--- this refers to the local callback for the job
});
});
});
async.parallel(jobs, function() {
// all jobs are done, let's finilize everything
callback();
});
};