我正在从N
网址中提取文字。首先,我在linksOnPage
中获取N个网址,然后运行doOnPage
函数从每个网址获取文本。当我运行代码时,只有N个url中的一个通过该函数进行处理。我认为这是因为处理函数是异步运行的。我如何将它们堆叠在队列中并运行它们/什么是更好的方法来执行此操作?
这是主要的JS代码:
var nodeio, linksOnPage, lyricsFromLink, db;
nodeio = require('node.io');
db = require('./db');
db.loadDB();
var loadSong = function(artist, title, lyrics){
console.log("loadSong being called");
var newSongObj = {};
newSongObj['artist'] = artist;
newSongObj['title'] = title;
newSongObj['lyrics'] = lyrics;
//store the lyrics in a mongo table
var newSong = new db.Song(newSongObj);
newSong.save(function(err) {
if(err){
throw err;
} else{
console.log("saved with no errors!");
}
});
};
// generic utility for getting links on a page and running a function on each one
exports.linksOnPage = function(pageObj, linkSelector, doOnPage, contentSelector) {
nodeio.scrape(function(){
this.getHtml(pageObj.pageUrl, function(err, $) {
var links = [];
var i = 0;
$(linkSelector).each(function(link) {
var fullLink = pageObj.rootUrl + link.attribs.href
links.push(fullLink);
//run a function on each link
console.log('getting lyrics for song: ', i);
doOnPage(pageObj.artist, fullLink, contentSelector);
i = i+1;
});
//this.emit(links);
});
});
}
// get the lyrics for a specific song
exports.lyricsFromLink = function(artist, pageUrl, lyricsSelector) {
nodeio.scrape(function(){
this.getHtml(pageUrl, function(err, $) {
var lyrics = "";
console.log('before each statement');
$(lyricsSelector).each(function(lyricParagraph) {
lyrics = lyrics + " " + lyricParagraph.text;
});
console.log('after each statement');
loadSong(artist, pageUrl, lyrics);
this.emit(lyrics)
});
});
}