我有一个node.js应用程序从网站上抓取信息。我正在使用npm包请求和cheerio,并且抓取工作正常,但我想在请求功能完成时执行其他操作。这是一些代码:
app.js
var express = require('express');
var extractor = require("./extractor");
console.log(extractor('http://www.example.com'));
var app = express();
app.get('/', function (req, res) {
res.send('Hello world\n');
});
app.listen(3000);
extractor.js(所有乐趣)
var request = require('request');
var cheerio = require('cheerio');
var Extractor = function(url) {
var games = [];
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('tr.game').each(function(i, v){
var game = { /* many attributes */ };
games.push(game);
});
}
});
this.extractedGames = games;
};
module.exports = function(url) {
return new Extractor(url);
};
最后,当我运行它时显示{ extractedGames: [] }
,这是因为输出是在请求处理结束之前打印的。所以我想在请求作业结束时向extracedGames属性添加一个on success事件。
由于
答案 0 :(得分:0)
自己解决了!我希望这可以帮助将来的人们(尽管我感觉自己像个完整的菜鸟)
诀窍是发出一个事件并在以后处理它。
var express = require('express');
var extractor = require("./extractor");
extractor('http://www.example.com');
var app = express();
app.get('/', function (req, res) {
res.send('Hello world\n');
});
app.listen(3000);
我删除了console.log(extractor('http://www.example.com'))
,因为这会在请求作业完成之前运行。所以我把它移到了事件处理函数。
var request = require('request');
var cheerio = require('cheerio');
var Emitter = require('events').EventEmitter;
var extractEmitter = new Emitter();
extractEmitter.on('extracted', function(extractedGames){
console.log(extractedGames);
});
var Extractor = function(url) {
var games = [];
request(url, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('tr.game').each(function(i, v){
var game = { /* many attributes */ };
games.push(game);
});
extractEmitter.emit('extracted', games);
}
});
this.extractedGames = games;
};
module.exports = function(url) {
return new Extractor(url);
};