我终于弄清楚回调如何在node.js中运行,但我现在正在尝试让我的代码按顺序执行。
目标是(按顺序):
<td>
中的每个<tbody>
。最终目标是遍历每个页面(每个日期都有一个单独的URL,因此我循环查看日期)并插入不在我的数据库中的玩家ONCE。问题是它在执行INSERT查询之前经过了每个SELECT,因此它会多次插入它们。
这是我正在解析的页面,如果它有帮助:http://www.basketball-reference.com/friv/dailyleaders.cgi?month=12&day=29&year=2014
这是我的代码:
function loadPage (url, callback){
request(url, function(err, response, body){
if(!err && response.statusCode ==200){
var $ = cheerio.load(body);
rowsRemaining = $.length;
$('td', 'tbody').each(function(){
var text = $(this).text();
data.push(text);
rowsRemaining -= 1;
console.log('rows left: ',rowsRemaining);
});
}
if (rowsRemaining == 0){
console.log('$ length: ',$.length);
callback(data);
}
});
}
function loopThroughData (data, callback){
for(i=1;i<data.length;i+=26){
lookForPlayer(data[i].replace("'",""),function(name){
/* var insertPlayer = connection.query(
'INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = "'+data[i+1]+'"),1,"'+name+'");',function(err,result,fields){
}); */
console.log('i is currently = ',i);
});
}
callback();
}
function lookForPlayer(name, callback){
console.log('Looking for Player...');
var selectPlayer = connection.query(
"SELECT * FROM player WHERE name = '"+name+"'", function(err, rows, fields){
if(err) throw err;
if(rows.length==0){
callback(name);
}
});
}
//loop through every day since the season started
for (d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
console.log('d = ',d);
loadPage(baseURL+(d.getMonth()+1)+'&day='+d.getDate()+'&year='+d.getFullYear(),function(data){
console.log('Page loaded...');
loopThroughData(data,function(){
});
});
}
正如您所看到的,我尝试添加一个rowsRemaining变量,旨在确保在调用loadPage函数中的回调之前解析整个文件,但它永远不会到达那一点。请注意,我在这些函数(rowsRemaining,data等)之前初始化了很多这些变量。
在完全加载,解析和插入第一页之前,它似乎也循环遍历每个日期,它不应该这样做。
以下是基于@ Brant答案的更新代码
function loadPage (url, callback){
request(url, function(err, response, body){
if(!err && response.statusCode ==200){
var $ = cheerio.load(body);
console.log(url);
$('td', 'tbody').each(function(){
var text = $(this).text();
data.push(text);
});
}
callback(data);
});
}
function loopThroughData (data, callback){
for(i=1;i<data.length;i+=26){
lookForPlayer(data[i].replace("'",""),function(name){
var insertPlayer = connection.query(
'INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = "'+data[i+1]+'"),1,"'+name+'");',function(err,result,fields){
});
});
}
callback(data);
}
function lookForPlayer(name, callback){
var selectPlayer = connection.query(
"SELECT * FROM player WHERE name = '"+name+"'", function(err, rows, fields){
if(err) throw err;
if(rows.length==0){
console.log(name,' was not found in DB!');
callback(name);
}
});
}
//loop through every day since the season started
for (d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
validDatesArr.push(d);
}
async.eachSeries(validDatesArr,
function(validDatesArr, callback){
loadPage(baseURL+'/month='+validDatesArr.getMonth()+1+'&day='+validDatesArr.getDate()+'&year='+validDatesArr.getFullYear(),function(data){
loopThroughData(data, function(){
callback();
});
});
}, function(err){
if(!err){
console.log('We processed each date requests one by one');
}
}
);
所以现在它逐个加载页面,但它没有在该数据的loopThroughData函数中执行INSERT函数。我想我会在异步列表中添加另一个函数,但是这个特殊的函数调用函数而不是使用匿名函数。
答案 0 :(得分:1)
修改你的for循环如下:
//loop through every day since the season started
var validDatesArr = [];
for (var d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
validDatesArr.push(d);
}
async.eachSeries(validDatesArr,
function(d, callback) {
loadPage(baseURL+(d.getMonth()+1)+'&day='+d.getDate()+'&year='+d.getFullYear(),function(data){
console.log('Page loaded...');
loopThroughData(data,function(){
callback();
});
});
}, function(err) {
if(!err) {
console.log('We processed each date request one by one')
}
}
);
并且需要async,可在此处找到:https://github.com/caolan/async
npm install async
答案 1 :(得分:0)
您可以嵌套Async函数来控制执行流程,就像在序列编程中一样,在Pyramid of doom中要小心,另一个解决方案是使用您使用的异步函数的Sync版本(如果存在)。如果你不需要它们,你不会被迫编写异步函数,Node.js使用了大量的异步函数,因为它是一种非常强大的非开发语言,适用于Web开发。所以不要在函数中使用asyn样式和回调!