我正在尝试构建node.js webscraper以了解有关节点的更多信息。 我已经遇到了分页的墙。我已经使用过' async'建议我处理多个页面请求的包。
当我运行代码时,即使测试用例返回true,它也不会迭代多个请求:
var async = require('async');
var request = require('request');
var cheerio = require('cheerio');
var page = 0;
var options = {
url: 'http://www.metacritic.com/browse/movies/title/dvd?page=' + page,
headers: {
'User-Agent': 'Mozilla Firefox'
}
};
var scores = [];
var titles = [];
var metaData = [];
var scoresTitles = {};
var pageExists = true;
async.whilst(
function(){
return page < 2;
},
function(next){
request(options, function(err, res, html) {
if(err) { console.log(err) }
var $ = cheerio.load(html);
console.log('status code:' + res.statusCode);
console.log(res.headers);
$('td.score_wrapper div.metascore_w').each(function(i, node) {
scores.push($(this).text());
})
$('td.title_wrapper div.title a').each(function(i, node) {
titles.push($(this).text());
})
for (i = 0; i < scores.length; i++) {
scoresTitles = {
score: scores[i],
title: titles[i]
}
metaData.push(scoresTitles);
}
console.log(metaData);
if ($('div[class=pad_top1]').text().trim() == 'No movies found.') {
pageExists = false;
}
page++;
})
console.log(page);
}
);
非常感谢任何帮助。
答案 0 :(得分:2)
我刚刚在笔记本电脑上执行了你的代码,它看起来效果很好:
0
status code:200
{ 'content-type': 'text/html; charset=UTF-8',
'transfer-encoding': 'chunked',
connection: 'close',
age: '0',
'access-control-allow-headers': 'Origin, Authorization, X-Requested-With',
'access-control-allow-methods': 'POST, GET, OPTIONS',
'set-cookie':
[ 'ctk=NTljMWQ4YTM2N2YzMGMxYWRjMWQwZmQ5ZjUyNQ%3D%3D; expires=Mon, 19-Mar-2018 02:55:31 GMT; Max-Age=15552000; path=/; domain=.metacritic.com',
'il_geo = %7B%22country%5Fcode%22%3A%22AU%22%2C+%22country%5Fname%22%3A%22Australia%22%2C+%22dma%5Fcode%22%3A%22ZZ%22%2C+%22postal%5Fcode%22%3A%223122%22%7D; path=/; domain=www.metacritic.com; expires=Wed, 27-Sep-17 02:55:31 GMT' ],
date: 'Wed, 20 Sep 2017 02:55:46 GMT',
'x-varnish': '561413567',
'x-instart-request-id': '13619236965216160776:FLQ01-NPPRY16:1505876146:0' }
[ { score: '42', title: '#Horror' },
{ score: '68', title: '$9.99' },
{ score: '34', title: '$pent' },
{ score: '83', title: '\'71' },
{ score: '55', title: '\'R Xmas' },
{ score: '76', title: '(500) Days of Summer' },
{ score: '60', title: '+1' },
{ score: '58', title: '...And They Lived Happily Ever After' },
{ score: '65', title: '...So Goes the Nation' },
{ score: '57', title: '1,000 Times Good Night' },
{ score: '37', title: '10 Cent Pistol' },
{ score: '76', title: '10 Cloverfield Lane' },
{ score: '54', title: '10 Items or Less' },
{ score: '70', title: '10 Things I Hate About You' },
{ score: '61', title: '10 Years' },
{ score: '34', title: '10,000 BC' },
{ score: '75', title: '10,000 km' },
{ score: '63', title: '100 Bloody Acres' },
{ score: '44', title: '100 Streets' },
{ score: '49', title: '101 Dalmatians' },
{ score: '35', title: '102 Dalmatians' },
{ score: '36', title: '10th & Wolf' },
{ score: '71', title: '11 Flowers' },
{ score: '65', title: '11:14' },
{ score: '96', title: '12 Angry Men' },
{ score: '38', title: '12 Rounds' },
{ score: '96', title: '12 Years a Slave' },
{ score: '82', title: '127 Hours' },
{ score: '84', title: '13 Assassins' },
{ score: '41', title: '13 Cameras' },
{ score: '57', title: '13 Going on 30' },
{ score: '48',
title: '13 Hours: The Secret Soldiers of Benghazi' },
{ score: '61', title: '13 Tzameti' },
{ score: '50', title: '14 Blades' },
{ score: '64', title: '1408' },
{ score: '34', title: '15 Minutes' },
{ score: '47', title: '15: The Movie' },
{ score: '67', title: '16 Acres' },
{ score: '63', title: '16 Blocks' },
{ score: '57', title: '16 Years of Alcohol' },
{ score: '48', title: '17 Again' },
{ score: '37', title: '1911' },
{ score: '73', title: '1971' },
{ score: '29', title: '1st Night' },
{ score: '61', title: '2 Days in New York' },
{ score: '67', title: '2 Days in Paris' },
{ score: '38', title: '2 Fast 2 Furious' },
{ score: '55', title: '2 Guns' },
{ score: '58', title: '20 Centimeters' },
{ score: '83', title: '20 Feet from Stardom' },
{ score: '33', title: '200 Cigarettes' },
{ score: '86', title: '2001: A Space Odyssey' },
{ score: '40', title: '2009: Lost Memories' },
{ score: '49', title: '2012' },
{ score: '78', title: '2046' },
{ score: '83', title: '20th Century Women' },
{ score: '48', title: '21' },
{ score: '34', title: '21 and Over' },
{ score: '70', title: '21 Grams' },
{ score: '69', title: '21 Jump Street' },
{ score: '51', title: '21 Years: Richard Linklater' },
{ score: '71', title: '22 Jump Street' },
{ score: '45', title: '23 Blast' },
{ score: '59', title: '24 Days' },
{ score: '85', title: '24 Hour Party People' },
{ score: '47', title: '24 Hours on Craigslist' },
{ score: '67', title: '25th Hour' },
{ score: '47', title: '27 Dresses' },
{ score: '46', title: '28 Days' },
{ score: '73', title: '28 Days Later...' },
{ score: '50', title: '28 Hotel Rooms' },
{ score: '78', title: '28 Weeks Later' },
{ score: '55', title: '3' },
{ score: '46', title: '3 Dancing Slaves' },
{ score: '40', title: '3 Days to Kill' },
{ score: '9', title: '3 Geezers!' },
{ score: '47', title: '3 Generations' },
{ score: '56', title: '3 Hearts' },
{ score: '67', title: '3 Idiots' },
{ score: '48', title: '3 Needles' },
{ score: '11', title: '3 Strikes' },
{ score: '71', title: '3 Women' },
{ score: '51', title: '3, 2, 1... Frankie Go Boom' },
{ score: '72', title: '3-Iron' },
{ score: '53', title: '30 Days of Night' },
{ score: '49', title: '30 Minutes or Less' },
{ score: '56', title: '30 Years to Life' },
{ score: '52', title: '300' },
{ score: '21', title: '3000 Miles to Graceland' },
{ score: '48', title: '300: Rise of an Empire' },
{ score: '35', title: '31' } ]
您使用的是哪个版本的节点?如果您正在使用lts,则尚不支持异步。在这种情况下,请考虑切换到最新版本(现在为8.5.0)。