我是一个绝对的初学者,学习js和node.js,我正在构建一个简单的刮刀。代码正在抓取多个域。我想存储信息,如标题,元标记,如描述等,每个域被刮入哈希表,但我不知道如何继续。你能简单解释一下怎么做吗?在这里你有代码
var request = require('request');
var cheerio = require('cheerio');
var URL = require('url-parse');
var arr = ["http://allrecipes.com/", "http://www.gossip.fr/" ];
console.log("Visiting pages now... ");
for (var i = 0; i < arr.length; i++) {
setTimeout(request, 5000 * i, arr[i], function (error, response, body) {
if(error) {
console.log("Error: " + error);
}
console.log("Status code: " + response.statusCode);
if(response.statusCode === 200) {
var $ = cheerio.load(body);
console.log("Page title: " + $('title').text());
}
});
}
我修改了代码,如下所示,但不是将每个域的标题存储到哈希值中,而是将结果存储在数组的最后一个域中。见下面的代码
var request = require('request');
var cheerio = require('cheerio');
var URL = require('url-parse');
var output = {};
var arr = ["http://allrecipes.com/", "http://www.gossip.fr/", "http://www.clicrbs.com.br/rs/" ];
console.log("Visiting pages now... ");
for (var i = 0; i < arr.length; i++) {
var result = arr[i];
setTimeout(request, 5000 * i, arr[i], function (error, response, body) {
if(error) {
console.log("Error: " + error);
}
console.log("Status code: " + response.statusCode);
if(response.statusCode === 200) {
var $ = cheerio.load(body);
console.log("Page title: " + $('title').text());
}
{
output[result] = {error: error, title: $('title').text(), status: response.statusCode}
}
});
}
答案 0 :(得分:0)
使用let
代替var
。这样您就可以在每次迭代中创建一个新的let
变量,而不是覆盖旧的var
。
正确:
let output = {}
let a = ['a', 'b', 'c']
for(let i=0; i<a.length; i++) {
let result = a[i]; setTimeout(x => output[result] = i, i*5)
}
setTimeout(() => document.write(JSON.stringify(output)), 20)
&#13;
不正确:
let output = {}
let a = ['a', 'b', 'c']
for(let i=0; i<a.length; i++) {
var result = a[i]; setTimeout(x => output[result] = i, i*5)
}
setTimeout(() => document.write(JSON.stringify(output)), 20)
&#13;
您可以在Mozilla开发者网络上详细了解let
和lexicographic scoping。