我有一些代码用360个产品解析XML文件,并从URL抓取图像,但在解析大型XML时遇到了一些麻烦。我的主要问题是我如何重新编写代码以便同步工作,所以我使用async.eachSeries
,但它没有帮助。
var fs = require('fs');
var request = require('request');
var parseString = require('xml2js').parseString;
var baseUrl = 'http://test.com/uploads/catalog_item_image_main/';
var async = require('async');
var processImg = require('./downloader');
async.waterfall([
function readFileSync(readFileSyncCallback) {
var xml = fs.readFileSync("./test.xml", "utf8");
readFileSyncCallback(null, xml);
},
function parse(xml, parseStringCallback) {
parseString(xml, function(err, result) {
if (err) parseStringCallback(err);
var products = result.product_list.product;
parseStringCallback(null, products);
})
},
function iterate(products, iterateCallback) {
console.log("PRODUCTS QNT - " + products.length);
async.eachSeries(products, function(prdt, callbackDone) {
// Perform operation on file here.
console.log('Processing file ' + prdt.sku);
var filename = prdt.sku + "";
filename = filename.replace(/\//g, '_');
processImg(baseUrl + filename + '_big.', filename, callbackDone());
}, function(err) {
// if any of the file processing produced an error, err would equal that error
if (err) {
// One of the iterations produced an error.
// All processing will now stop.
console.log('A prdt failed to process');
} else {
console.log('All prdt have been processed successfully');
}
});
}
], function(error) {
if (error) {
console.log(error);
}
});
然后我运行我的脚本,我看到所有360
console.log('Processing file ' + prdt.sku)
消息,然后我才看到消息下载了什么图像
console.log(uri + [extension] + " - downloaded");
以下是我的' ./下载程序' 模块中的代码:
/**
* Created by Gabriel on 6/27/2015.
*/
var fs = require('fs'),
request = require('request');
var processImg = function(uri, filename, callback) {
checkHead(uri + 'png', function(length) {
if (length > 2000) {
download(uri + 'png', filename + '.png', function() {
console.log(uri + 'png' + " - downloaded");
callback;
});
} else {
checkHead(uri + 'jpg', function(length) {
if (length > 2000) {
download(uri + 'jpg', filename + '.jpg', function() {
console.log(uri + 'jpg' + " - downloaded");
callback;
});
} else {
checkHead(uri + 'jpeg', function(length) {
if (length > 2000) {
download(uri + 'jpeg', filename + '.jpeg', function() {
console.log(uri + 'jpeg' + " - downloaded");
callback;
});
} else {
checkHead(uri, function(length) {
if (length > 2000) {
download(uri, filename, function() {
callback;
});
callback;
} else {
console.log(uri + " - DOWNLOAD ERRROR!!!!!!!!!!!")
callback;
}
});
}
});
}
});
}
});
};
var checkHead = function(uri, callback) {
request.head(uri, function(err, res, body) {
if (err) return console.log(err);
var length = res.headers['content-length'];
callback(length);
});
}
var download = function(uri, filename, callback) {
request(uri).pipe(fs.createWriteStream('./static/' + filename).on('close', callback));
};
module.exports = processImg;