global.gc()在每次循环后都没有减少内存?

时间:2017-06-19 06:50:26

标签: node.js web-crawler cheerio

我有一个api to crawler,我尝试使用global()。gc来减少每个循环后的堆内存,但它不起作用。请指出我做错了什么? 它还会导致堆内存不足问题

app.get('/test2', (req, res) => {

res.json({mes:'正在获取数据'});

array = [...]; //数组包含大约1000个元素作为链接

function something(){

let d = q.defer();

let urls = [];
    array.forEach(function (mang, index) {
        let tagArray = [];
        tagArray = null;
        tagArray = [];

        //use this function to reduce the memory heap after looping each element of array 
        global.gc();

        for (let i = 1; i <= 4000; i++) {
            urls.push(function (callback) {
                setTimeout(function () {
                    let link = 'http://something' + mang.link + '/tag-' + i;
                    //we will have about 4000 links due to i
                    let x = link;

                    let options = {
                        url: link,
                        headers: {
                            'User-Agent': 'MY IPHONE 7s'
                        }
                    };

                    function callback1(error, response, html) {
                        if (!error) {
                            let $ = whacko.load(html);
                            let tag_name = $('h1').text();
                            tag_name = tag_name.trim();
                            console.log(tag_name);
                            let tag_content = $('#content').find('div').contents();
                            tag_content = tag_content.toString();
                            if (tag_name !== "" && tag_content !== "") {

                                let tagObject = new Object();

                                tagObject.tag_name = tag_name;
                                tagObject.tag_content = tag_content;
                                tagObject.tag_number = i;

                                tagArray.push(tagObject);

                                if (tagArray.length == 4000) {
                                    tagArray.sort(function (a, b) {
                                        return parseInt(a.tag_number) - parseInt(b.tag_number);
                                    });

                                    for (let v = 0; v < tagArray.length; v++) {
                                        db.query("INSERT INTO `tags` (tag_name, content, tag_number) " +
                                            "SELECT * FROM (SELECT " + "'" + tagArray[v].tag_name + "'" + "," + "'" + tagArray[v].tag_content + "','" + tagArray[v].tag_number + "' as ChapName) AS tmp " +
                                            "WHERE NOT EXISTS (SELECT `tag_name` FROM `tags` WHERE `tag_name`=" + "'" + tagArray[v].tag_name + "'" + ") " +
                                            "LIMIT 1", (err) => {
                                            if (err) {
                                                console.log(err);
                                            }
                                        });
                                    }
                                    urls = null;
                                }
                            }
                        }
                    }

                    request(options, callback1);
                    callback(null, x);
                }, 12000);
            });
        }
    });

    d.resolve(urls);
    return d.promise;

}

something()
.then(function (data) {
    let tasks = data;
    console.log("start data");
    async.parallelLimit(tasks, 40, () => {
        console.log("DONE ");
    });
})

0 个答案:

没有答案