我正在尝试使用cheerio从网站上抓取一些数据,但我收集了一些[href] s然后我再次想要请求那些链接并获取一些数据但是我得到所有的链接但是当执行循环时,我无法从这些页面看到html。 我错过了什么?
var express=require('express');
var path = require('path');
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var app = express();
var port =8080;
var cheerioTableparser = require('cheerio-tableparser');
var Links = [];
var uniqueLinks = [];
var url = "http://getThoseLinks.com";
var count = 2;
app.get('/',function (req,res) {
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
for (var i = 1; i < count; i++) {
var url2 = "http://getThoseLinks.com?pager=true&paged="+i;
request(url2,function (err,res,body) {
var $ = cheerio.load(html);
cheerioTableparser($);
var data = $(".Q-grid").parsetable(false, false, false);
console.log(data.length);
for (var i = 0; i < data[0].length; i=i+4) {
var regex = /<a href="(.*?)"/g;
while (match = regex.exec(data[0][i])) {
console.log(match[1]);// **this gets printed**
request(url+match[1],function (errLink,resLink,htmlData) {
var $ = cheerio.load(htmlData);
console.log($); // **This does not**
})
}
}
});
};
uniqueLinks=Links.filter(function(item, i, ar){ return ar.indexOf(item) === i; });
for (var i = 0; i < uniqueLinks.length; i++) {
data= "http://getThoseLinks.com" + uniqueLinks[i];
console.log(data);
}
}
else
{
console.log(error);
}
});
});
app.listen(port);
exports= module.exports= app;
console.log('server running on '+port);