嵌套nodejs请求的参数句柄

时间:2014-08-30 01:01:13

标签: javascript node.js

在某些父页面中,我需要一些子页面锚点。我想抓取所有父页面,解析它们,然后获取子锚点,跟随锚点,并获得结果。但是当我编写代码时我发现,在我关注锚点之前,锚点网址没有改变。这是我的代码:

var req = require('request');
var cheerio = require('cheerio')
var model = require('./model')


function callnext(index){
    var url = 'http://www.youku.com/show_episode/id_z2c9b63e691e611e2b356.html?dt=json&divid=reload_'+index+'&__rt=1&__ro=reload_21';
    var result = req.get(url, function(error, response, body){
        if (!error && response.statusCode == 200) {
            var patt = /暂无内容/g;
            var result = patt.test(body);
            if(result){
                return;
            }
            $ = cheerio.load(body);
            var children = $('div').first().children();
            for(var i=0;i<children.length;i++){
                var item = $(children[i]);
                var anchor = $(item.find('li>a')[0]).attr('href');
                var labelText = $(item.find('label')[0]).text();
                //TAG 1
                req.get(anchor, function(error, response, body){
                    //TAG 2
                    console.log(anchor);
                    //here's my result
                })
            }
            index = index+20;
            callnext(index)
        }
    })
}
callnext(1);

在这段代码中,如果我在TAG1位置和TAG2位置的console.log()锚点网址,它​​会产生不同的结果。 在TAG 1中,它是我的预期结果,但在TAG 2中,似乎只打印出父页面的第一个锚点。

我试图改变代码并提取子请求函数,结果是正确的结果。为什么?

var req = require('request');
var cheerio = require('cheerio')
var model = require('./model')

function crawlItem(url, text){
        req.get(url, function(error, response, body){
        console.log(url)
        var inner = cheerio.load(body);
        var text = inner('#text_long').text();
        // model.Talk.create({ id: la, video: hr, youku_desc:text }).complete(function(err, album) {
        //  console.log(err);
        // });
    })
}

function callnext(index){
    var url = 'http://www.youku.com/show_episode/id_z2c9b63e691e611e2b356.html?dt=json&divid=reload_'+index+'&__rt=1&__ro=reload_21';
    var result = req.get(url, function(error, response, body){
        if (!error && response.statusCode == 200) {
            var patt = /暂无内容/g;
            var result = patt.test(body);
            if(result){
                return;
            }
            $ = cheerio.load(body);
            var children = $('div').first().children();
            for(var i=0;i<children.length;i++){
                var item = $(children[i]);
                var anchor = $(item.find('li>a')[0]).attr('href');
                var labelText = $(item.find('label')[0]).text();
                // console.log(anchor);
                crawlItem(anchor, labelText);
            }
            index = index+20;
            callnext(index)
        }
    })
}

callnext(1);

0 个答案:

没有答案