我是node.js的新手。我创建了一个名为events_scraper.js的文件,并将此代码放在该文件中:
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var regions = ['campania','molise','puglia','basilicata','sicilia','sardegna'];
var domain = 'http://www.eventiesagre.it';
var basePath = 'http://www.eventiesagre.it/cerca/eventi/sagre/maggio/{{region}}/prov/cit/intit/rilib';
var result = 'path_to_folder{{region}}.json';
//start of scraper
function getData(path, region) {
request(path, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
// research information for each events
var evt = {
categoria: $('.category').text().trim().replace( /\s\s+/g, ' '),
titolo: $('.summary').text(),
sottotitolo: $('.titolo').siblings('.testoxxsmall').text(),
dal: $('.dtstart').text(),
al: $('.dtend').text(),
tel: $('[alt="info evento"]').parent().next().text(),
};
var email = $('[src="/template/originalBlu/images/comuni/mail - at.gif"]').siblings('a').first();
if (email.length) {
evt.email = email.attr('href').split('mailto:')[1];
}
var adr = $('.location .adr ');
adr.find('.testo10').remove();
adr.find('.region').remove();
evt.dove = adr.text().trim().replace( /\s\s+/g, ' ');
var linkSito = $('[src="/template/originalBlu/images/comuni/sito - www.gif"]').siblings('a');
if (linkSito.length > 1) {
evt.sito = [];
linkSito.each(function(i, sito){
evt.sito.push({
url: $(sito).attr('href'),
nome: $(sito).text()
});
});
} else {
evt.sito = {
url: linkSito.attr('href'),
nome: linkSito.text()
};
}
fs.appendFile(result.replace('{{region}}', region), JSON.stringify(evt) + '\n', function (err) {
if (err) return console.log(err);
});
console.log(evt);
}
});
}
function getStuff(path, region) {
request(path, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('.vevent').each(function(i, element){
var link = domain + $(element).find('.summary').attr('href');
getData(link, region);
});
var next = $('.elencoNav a:contains(Avanti)').first().attr('href');
if(next) {
getStuff(next, region);
}
}
});
}
regions.forEach(function(region){
fs.writeFile(result.replace('{{region}}', region), '');
getStuff(basePath.replace('{{region}}', region), region);
});
然后我运行应用程序,我收到此错误:
SyntaxError: Unexpected identifier
at Object.exports.createScript (vm.js:44:10)
at REPLServer.defaultEval (repl.js:117:23)
at bound (domain.js:254:14)
at REPLServer.runBound [as eval] (domain.js:267:12)
at REPLServer.<anonymous> (repl.js:279:12)
at REPLServer.emit (events.js:107:17)
at REPLServer.Interface._onLine (readline.js:214:10)
at REPLServer.Interface._line (readline.js:553:8)
at REPLServer.Interface._ttyWrite (readline.js:830:14)
at ReadStream.onkeypress (readline.js:109:10)
这个错误是由第一个原始var request = require('request');
上的代码引起的答案 0 :(得分:0)
您似乎在运行脚本时遇到错误。尝试运行node index.js
。如果您的文件名不是index.js
,请进行更改。