我正在使用X射线作为刮刀编写Express.js应用程序以获取一些信息。
我想为我抓取的每个网站创建一个模型(不同的网站=不同的数据/程序来刮除)。 这是模块的代码:
module.exports.getData = function(title){
var Xray = require('x-ray');
var x = Xray();
var url = "http://www.mywebsite.it/online/search?text="+title;
var scraped = '';
x(url, '.listing-products.listing-rows.clearfix h2.title',
[{
title: 'a',
link: 'a@href'
}]
)(function (err, obj) {
for (var i = obj.length - 1; i >= 0; i--) {
scraped += obj[i]['title'] + " "; //we get just the title of the links for now
};
sendScraped(scraped);
});
}
问题是控制器的函数getData没有返回任何内容,因为它是 调用并执行前进而不等待x()抓取函数的完成。
我试图实现一个回调函数sendScraped(scraped)让我的控制器等待completation,但是 我不知道如何从模型中呼唤它。 这是我在控制器中尝试的:
var mywebsite = require('../models/mywebsite')
exports.searchTitle = function(req, res) {
mywebsite.getData(req.params.title);
};
global.sendScraped = function endScraping(data) {
return res.send(data);
}
答案 0 :(得分:3)
我建议您更改getData()
方法以进行回调。然后它可以使用抓取的数据调用该回调:
module.exports.getData = function (title, callback) {
var Xray = require('x-ray');
var x = Xray();
var url = "http://www.mywebsite.it/online/search?text=" + title;
var scraped = '';
x(url, '.listing-products.listing-rows.clearfix h2.title', [{
title: 'a',
link: 'a@href'
}])(function (err, obj) {
if (err) return callback(err);
for (var i = obj.length - 1; i >= 0; i--) {
scraped += obj[i]['title'] + " "; //we get just the title of the links for now
};
callback(null, scraped);
});
}
然后,您可以在请求处理程序中使用它,如下所示:
var mywebsite = require('../models/mywebsite')
exports.searchTitle = function(req, res) {
mywebsite.getData(req.params.title, function(err, scrapeData) {
if (err) {
// do something with error
} else {
res.send(scrapeData);
}
});
};
而且,这是一个使用promises的版本:
var Xray = require('x-ray');
function xp(url, selector, args) {
return new Promise(function(resolve, reject) {
var x = Xray();
x(url, selector, args)(function(err, obj) {
if (err) {
reject(err);
} else {
resolve(obj);
}
});
});
}
module.exports.getData = function (title) {
var url = "http://www.mywebsite.it/online/search?text=" + title;
var selector = '.listing-products.listing-rows.clearfix h2.title';
return xp(url, selector, [{title: 'a', link: 'a@href'}]).then(function(obj) {
var scraped = "";
for (var i = obj.length - 1; i >= 0; i--) {
scraped += obj[i]['title'] + " "; //we get just the title of the links for now
}
return scraped;
});
}
然后,您可以在请求处理程序中使用它,如下所示:
var mywebsite = require('../models/mywebsite');
exports.searchTitle = function(req, res) {
mywebsite.getData(req.params.title).then(function(scrapeData) {
res.send(scrapeData);
}, function(err) {
// handle error here
});
};