网络刮刀不使用分页

时间:2017-09-19 18:58:22

标签: javascript node.js

我正在尝试构建node.js webscraper以了解有关节点的更多信息。 我已经遇到了分页的墙。我已经使用过' async'建议我处理多个页面请求的包。

当我运行代码时,即使测试用例返回true,它也不会迭代多个请求:

var async = require('async');
var request = require('request');
var cheerio = require('cheerio');

var page = 0;
var options = {
    url: 'http://www.metacritic.com/browse/movies/title/dvd?page=' + page,
    headers: {
        'User-Agent': 'Mozilla Firefox'
    }
};

var scores = [];
var titles = [];
var metaData = [];
var scoresTitles = {};
var pageExists = true;

async.whilst(
    function(){
        return page < 2;
    },
    function(next){
        request(options, function(err, res, html) {
            if(err) { console.log(err) }

            var $ = cheerio.load(html);
            console.log('status code:' + res.statusCode);
            console.log(res.headers);


            $('td.score_wrapper div.metascore_w').each(function(i, node) {
                scores.push($(this).text());
            })

            $('td.title_wrapper div.title a').each(function(i, node) {
                titles.push($(this).text());
            })

            for (i = 0; i < scores.length; i++) {
                scoresTitles = {
                    score: scores[i],
                    title: titles[i]
                }
                metaData.push(scoresTitles);
            }

            console.log(metaData);
            if ($('div[class=pad_top1]').text().trim() == 'No movies found.') {
                pageExists = false;
            }
             page++;
        })

       console.log(page);  

    }
);

非常感谢任何帮助。

1 个答案:

答案 0 :(得分:2)

我刚刚在笔记本电脑上执行了你的代码,它看起来效果很好:

0
status code:200
{ 'content-type': 'text/html; charset=UTF-8',
  'transfer-encoding': 'chunked',
  connection: 'close',
  age: '0',
  'access-control-allow-headers': 'Origin, Authorization, X-Requested-With',
  'access-control-allow-methods': 'POST, GET, OPTIONS',
  'set-cookie':
   [ 'ctk=NTljMWQ4YTM2N2YzMGMxYWRjMWQwZmQ5ZjUyNQ%3D%3D; expires=Mon, 19-Mar-2018 02:55:31 GMT; Max-Age=15552000; path=/; domain=.metacritic.com',
     'il_geo = %7B%22country%5Fcode%22%3A%22AU%22%2C+%22country%5Fname%22%3A%22Australia%22%2C+%22dma%5Fcode%22%3A%22ZZ%22%2C+%22postal%5Fcode%22%3A%223122%22%7D; path=/; domain=www.metacritic.com; expires=Wed, 27-Sep-17 02:55:31 GMT' ],
  date: 'Wed, 20 Sep 2017 02:55:46 GMT',
  'x-varnish': '561413567',
  'x-instart-request-id': '13619236965216160776:FLQ01-NPPRY16:1505876146:0' }
[ { score: '42', title: '#Horror' },
  { score: '68', title: '$9.99' },
  { score: '34', title: '$pent' },
  { score: '83', title: '\'71' },
  { score: '55', title: '\'R Xmas' },
  { score: '76', title: '(500) Days of Summer' },
  { score: '60', title: '+1' },
  { score: '58', title: '...And They Lived Happily Ever After' },
  { score: '65', title: '...So Goes the Nation' },
  { score: '57', title: '1,000 Times Good Night' },
  { score: '37', title: '10 Cent Pistol' },
  { score: '76', title: '10 Cloverfield Lane' },
  { score: '54', title: '10 Items or Less' },
  { score: '70', title: '10 Things I Hate About You' },
  { score: '61', title: '10 Years' },
  { score: '34', title: '10,000 BC' },
  { score: '75', title: '10,000 km' },
  { score: '63', title: '100 Bloody Acres' },
  { score: '44', title: '100 Streets' },
  { score: '49', title: '101 Dalmatians' },
  { score: '35', title: '102 Dalmatians' },
  { score: '36', title: '10th & Wolf' },
  { score: '71', title: '11 Flowers' },
  { score: '65', title: '11:14' },
  { score: '96', title: '12 Angry Men' },
  { score: '38', title: '12 Rounds' },
  { score: '96', title: '12 Years a Slave' },
  { score: '82', title: '127 Hours' },
  { score: '84', title: '13 Assassins' },
  { score: '41', title: '13 Cameras' },
  { score: '57', title: '13 Going on 30' },
  { score: '48',
    title: '13 Hours: The Secret Soldiers of Benghazi' },
  { score: '61', title: '13 Tzameti' },
  { score: '50', title: '14 Blades' },
  { score: '64', title: '1408' },
  { score: '34', title: '15 Minutes' },
  { score: '47', title: '15: The Movie' },
  { score: '67', title: '16 Acres' },
  { score: '63', title: '16 Blocks' },
  { score: '57', title: '16 Years of Alcohol' },
  { score: '48', title: '17 Again' },
  { score: '37', title: '1911' },
  { score: '73', title: '1971' },
  { score: '29', title: '1st Night' },
  { score: '61', title: '2 Days in New York' },
  { score: '67', title: '2 Days in Paris' },
  { score: '38', title: '2 Fast 2 Furious' },
  { score: '55', title: '2 Guns' },
  { score: '58', title: '20 Centimeters' },
  { score: '83', title: '20 Feet from Stardom' },
  { score: '33', title: '200 Cigarettes' },
  { score: '86', title: '2001: A Space Odyssey' },
  { score: '40', title: '2009: Lost Memories' },
  { score: '49', title: '2012' },
  { score: '78', title: '2046' },
  { score: '83', title: '20th Century Women' },
  { score: '48', title: '21' },
  { score: '34', title: '21 and Over' },
  { score: '70', title: '21 Grams' },
  { score: '69', title: '21 Jump Street' },
  { score: '51', title: '21 Years: Richard Linklater' },
  { score: '71', title: '22 Jump Street' },
  { score: '45', title: '23 Blast' },
  { score: '59', title: '24 Days' },
  { score: '85', title: '24 Hour Party People' },
  { score: '47', title: '24 Hours on Craigslist' },
  { score: '67', title: '25th Hour' },
  { score: '47', title: '27 Dresses' },
  { score: '46', title: '28 Days' },
  { score: '73', title: '28 Days Later...' },
  { score: '50', title: '28 Hotel Rooms' },
  { score: '78', title: '28 Weeks Later' },
  { score: '55', title: '3' },
  { score: '46', title: '3 Dancing Slaves' },
  { score: '40', title: '3 Days to Kill' },
  { score: '9', title: '3 Geezers!' },
  { score: '47', title: '3 Generations' },
  { score: '56', title: '3 Hearts' },
  { score: '67', title: '3 Idiots' },
  { score: '48', title: '3 Needles' },
  { score: '11', title: '3 Strikes' },
  { score: '71', title: '3 Women' },
  { score: '51', title: '3, 2, 1... Frankie Go Boom' },
  { score: '72', title: '3-Iron' },
  { score: '53', title: '30 Days of Night' },
  { score: '49', title: '30 Minutes or Less' },
  { score: '56', title: '30 Years to Life' },
  { score: '52', title: '300' },
  { score: '21', title: '3000 Miles to Graceland' },
  { score: '48', title: '300: Rise of an Empire' },
  { score: '35', title: '31' } ]

您使用的是哪个版本的节点?如果您正在使用lts,则尚不支持异步。在这种情况下,请考虑切换到最新版本(现在为8.5.0)。