强制node.js中的顺序代码

时间:2014-12-31 18:32:00

标签: javascript sql node.js asynchronous

我终于弄清楚回调如何在node.js中运行,但我现在正在尝试让我的代码按顺序执行。

目标是(按顺序):

  1. 将网址加载到cheerio
  2. 解析页面上<td>中的每个<tbody>
  3. 将文本元素加载到数据数组中后,回调。
  4. 在完整的数据阵列上调用loopThroughData。
  5. 遍历数据数组并在每个数组上调用lookForPlayer数组 一,其中:
  6. 在我的数据库中运行一个与从该传递的球员名称相匹配的SELECT 文本元素,如果我的数据库中没有匹配项,请插入它们(我有 它只是打印到控制台暂时用于测试目的。)
  7. 最终目标是遍历每个页面(每个日期都有一个单独的URL,因此我循环查看日期)并插入不在我的数据库中的玩家ONCE。问题是它在执行INSERT查询之前经过了每个SELECT,因此它会多次插入它们。

    这是我正在解析的页面,如果它有帮助:http://www.basketball-reference.com/friv/dailyleaders.cgi?month=12&day=29&year=2014

    这是我的代码:

    function loadPage (url, callback){
        request(url, function(err, response, body){
            if(!err && response.statusCode ==200){
                var $ = cheerio.load(body);
                rowsRemaining = $.length;
                $('td', 'tbody').each(function(){
                    var text = $(this).text();
                    data.push(text);
                    rowsRemaining -= 1;
                    console.log('rows left: ',rowsRemaining);
                });
            }
            if (rowsRemaining == 0){
                console.log('$ length: ',$.length);
                callback(data);
            }
        });
    }
    
    function loopThroughData (data, callback){
        for(i=1;i<data.length;i+=26){
            lookForPlayer(data[i].replace("'",""),function(name){
                /* var insertPlayer = connection.query(
                    'INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = "'+data[i+1]+'"),1,"'+name+'");',function(err,result,fields){
                }); */
                console.log('i is currently = ',i);
    
            });
        }
        callback();
    }
    
    function lookForPlayer(name, callback){
        console.log('Looking for Player...');
        var selectPlayer = connection.query(
        "SELECT * FROM player WHERE name = '"+name+"'", function(err, rows, fields){
        if(err) throw err;
        if(rows.length==0){
            callback(name);
        }
        });
    }
    
    //loop through every day since the season started
    for (d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
        console.log('d = ',d);
        loadPage(baseURL+(d.getMonth()+1)+'&day='+d.getDate()+'&year='+d.getFullYear(),function(data){
            console.log('Page loaded...');
            loopThroughData(data,function(){
    
            });
        });
    }
    

    正如您所看到的,我尝试添加一个rowsRemaining变量,旨在确保在调用loadPage函数中的回调之前解析整个文件,但它永远不会到达那一点。请注意,我在这些函数(rowsRemaining,data等)之前初始化了很多这些变量。

    在完全加载,解析和插入第一页之前,它似乎也循环遍历每个日期,它不应该这样做。

    以下是基于@ Brant答案的更新代码

       function loadPage (url, callback){
        request(url, function(err, response, body){
            if(!err && response.statusCode ==200){
                var $ = cheerio.load(body);
                console.log(url);
                $('td', 'tbody').each(function(){
                    var text = $(this).text();
                    data.push(text);
                });
            }
            callback(data);
        });
    }
    
    function loopThroughData (data, callback){
        for(i=1;i<data.length;i+=26){
            lookForPlayer(data[i].replace("'",""),function(name){
                var insertPlayer = connection.query(
                    'INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = "'+data[i+1]+'"),1,"'+name+'");',function(err,result,fields){
                });
    
            });
        }
        callback(data);
    }
    
    function lookForPlayer(name, callback){
        var selectPlayer = connection.query(
        "SELECT * FROM player WHERE name = '"+name+"'", function(err, rows, fields){
        if(err) throw err;
        if(rows.length==0){
            console.log(name,' was not found in DB!');
            callback(name);
        }
        });
    }
    
    //loop through every day since the season started
    for (d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
        validDatesArr.push(d);
    }
    
    async.eachSeries(validDatesArr,
        function(validDatesArr, callback){
        loadPage(baseURL+'/month='+validDatesArr.getMonth()+1+'&day='+validDatesArr.getDate()+'&year='+validDatesArr.getFullYear(),function(data){
            loopThroughData(data, function(){
                callback();
            });
        });
        }, function(err){
            if(!err){
                console.log('We processed each date requests one by one');
            }
        }
    );
    

    所以现在它逐个加载页面,但它没有在该数据的loopThroughData函数中执行INSERT函数。我想我会在异步列表中添加另一个函数,但是这个特殊的函数调用函数而不是使用匿名函数。

2 个答案:

答案 0 :(得分:1)

修改你的for循环如下:

//loop through every day since the season started
var validDatesArr = [];

for (var d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
    validDatesArr.push(d);
}

async.eachSeries(validDatesArr, 
    function(d, callback) {
        loadPage(baseURL+(d.getMonth()+1)+'&day='+d.getDate()+'&year='+d.getFullYear(),function(data){
            console.log('Page loaded...');
            loopThroughData(data,function(){
                callback();
            });
        });
    }, function(err) {
        if(!err) {
            console.log('We processed each date request one by one')
        }
    }
);

并且需要async,可在此处找到:https://github.com/caolan/async

npm install async

答案 1 :(得分:0)

您可以嵌套Async函数来控制执行流程,就像在序列编程中一样,在Pyramid of doom中要小心,另一个解决方案是使用您使用的异步函数的Sync版本(如果存在)。如果你不需要它们,你不会被迫编写异步函数,Node.js使用了大量的异步函数,因为它是一种非常强大的非开发语言,适用于Web开发。所以不要在函数中使用asyn样式和回调!