尝试从此网页抓取酒店名称: http://www.booking.com/searchresults.ru.html?dcid=1&flash_deals=1
这是我的代码:
var request = require('request'),
cheerio = require('cheerio'),
fs = require('fs'),
urls = [];
request('http://www.booking.com/searchresults.ru.html?dcid=1&flash_deals=1', function(err, res, body) {
if(!err && res.statusCode == 200) {
var $ = cheerio.load(body);
$('a.hotelname base', '#dealsfound_inner').each(function(){
var url = this.text();
urls.push(url);
});
console.log(urls);
}
});
控制台返回空数组。我做错了什么?
答案 0 :(得分:0)
网址已被移动。查看您的http标头,例如使用cURL:
> curl -i "http://www.booking.com/searchresults.ru.html?dcid=1&flash_deals=1"
HTTP/1.1 301 Moved Permanently
Server: nginx
Date: Mon, 03 Feb 2014 18:53:38 GMT
Transfer-Encoding: chunked
Connection: keep-alive
Location: http://www.booking.com/searchresults.ru.html
X-Recruiting: Like HTTP headers? Come write ours: booking.com/jobs
也许该网站有某种类型的保护系统来防止你正在做的事情。