我是节点js的初学者,我试图编写一个网络抓取脚本。如果我每分钟发出少于15个请求,我获得了网站管理员的许可来刮取他们的产品。当我开始时,它曾经一次请求所有的URL,但经过一些工具,我能够遍历数组中的每个项目,但是当数组中没有更多的项目时,脚本不会停止?我对结果并不满意,觉得有更好的方法可以做到这一点。
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
var async = require('async');
app.get('/scrape', function(req, res){
productListing = ['ohio-precious-metals-1-ounce-silver-bar','morgan-1-ounce-silver-bar']
var i = 0;
async.eachLimit(productListing, 1, function (product, callback) {
var getProducts = function () {
var url = 'http://cbmint.com/' + productListing[i];
request(url, function(error, response, html) {
if(!error){
var $ = cheerio.load(html);
var title;
var json = { title : ""};
$('.product-name').filter(function(){
var data = $(this);
title = data.children().children().first().text();
json.title = title;
})
}
var theTime = new Date().getTime();
console.log(i);
console.log(json.title);
console.log(theTime);
i++;
});
}
setInterval(getProducts,10000);
})
res.send('Check your console!')
})
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;