我想使用JavaScript只将已爬网的网址存储在数组中。这是我的代码:
var Crawler = require("js-crawler");
var json2csv = require('json2csv');
var fs = require('fs');
var page= require('pa11y');
new Crawler().configure({depth: 2})
.crawl("http://www.nature.com/", function onSuccess(page) {
console.log('crawl:'+page.url);
}, null, function onAllFinished(crawledUrls) {
console.log('All crawling finished');
console.log('crawl:' +crawledUrls);
var crawled = [];
for (var i = 0; i < crawledUrls.length; i++)
{
crawled.push({
'url': crawledUrls[i]
});
}
storeCSV(crawled);
console.log(crawled);
function storeCSV(storeJSArray) {
fs.writeFile('C:/Users/Desktop/dashboard-master/data/crawl.csv', storeJSArray, function(err) {
if (err) throw err;
console.log('File saved!');
});
}
});
问题是我得到的输出如下:
[object Object][object Object][object Object]
我希望输出为
crawl:http://www.nature.com/
All crawling finished
crawl:http://www.nature.com/
C:\Users\Desktop\dashboard-master\config>node crawler.js
crawl:http://www.nature.com/
crawl:http://authorservices.springernature.com/scientific-editing/?utm_source=na
tureAuthors&utm_medium=referral&utm_campaign=natureAuthors
crawl:http://authorservices.springernature.com/language-editing/?utm_source=natu
reAuthors&utm_medium=referral&utm_campaign=natureAuthors
crawl:http://www.nature.com/search?article_type=research-highlights&order=da
te_desc
All crawling finished
crawl:http://www.nature.com/,http://authorservices.springernature.com/scientific
-editing/?utm_source=natureAuthors&utm_medium=referral&utm_campaign=natu
reAuthors,http://authorservices.springernature.com/language-editing/?utm_source=
natureAuthors&utm_medium=referral&utm_campaign=natureAuthors,http://www.
nature.com/search?article_type=research-highlights&order=date_desc
我错过了什么。对此提出任何建议都会有所帮助。
答案 0 :(得分:0)
您已经包含了json2csv,但是您没有使用它。您正在将包含对象的已爬网数组直接传递给调用.toString()的fs.WriteFile,结果为[object Object]字符串。
包含此代码段
var csv = json2csv({ data: crawled, fields: ['url'] });
storeCSV(csv);