由Node的文件系统解析混淆。这是我的代码:
var fs = require('fs'),
xml2js = require('xml2js');
var parser = new xml2js.Parser();
var stream = fs.createReadStream('xml/bigXML.xml');
stream.setEncoding('utf8');
stream.on('data', function(chunk){
parser.parseString(chunk, function (err, result) {
console.dir(result);
console.log('Done');
});
});
stream.on('end', function(chunk){
// file have been read over,do something...
console.log("IT'S OVER")
});
这导致......没有任何事情发生。根本没有来自XML2JS /解析器的输出。当我尝试console.log(chunk)
时,chunks
似乎没有基于除字节大小之外的任何其他任何有意义的块输出。一个'块'的输出是:
<?xml version="1.0" encoding="UTF-8"?>
<merchandiser xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="merchandiser.xsd">
<header><merchantId>1237</merchantId><merchantName>NORDSTROM.com</merchantName><createdOn>12/13/2013 23:50:57</createdOn></header>
<product product_id="52863929">// product info</product>
<product product_id="26537849">// product info</product>
<product product_id="25535647">// product info</product>
这个chunk里面有很多很多来自XML的<product>
条目。块将在<product>
条目的中间某处结束,下一个块将从此处停止。
主要问题是如何让createReadStream
从<product
开始,到</product>
结束时输出块?
编辑:为了获得正确的输出,这是从第一个<product>
开始到结束的XML:
<?xml version="1.0" encoding="UTF-8" ?>
<merchandiser xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="merchandiser.xsd">
<header>
<merchantId>1237</merchantId>
<merchantName>NORDSTROM.com</merchantName>
<createdOn>12/13/2013 23:50:57</createdOn>
</header>
<product product_id="52863929" name="Teva 'Psyclone' Print Sandal (Baby, Walker & Toddler) Camo/ Dark Olive 6 M" sku_number="52863929" manufacturer_name="Teva" part_number="1001701">
<category>
<primary>Toddler Unisex</primary>
<secondary>Shoes~~Sandals/Slides</secondary>
</category>
<URL>
<product>http://click.linksynergy.com/link?id=LUyP0GcLCGc&offerid=276223.52863929&type=15&murl=http%3A%2F%2Fshop.nordstrom.com%2FS%2F3297406%3Fcm_cat%3Ddatafeed%26cm_pla%3Dshoes%3Asandals%252fslides%26cm_ite%3Dteva_%2527psyclone%2527_print_sandal_%2528baby%252c_walker_%2526_toddler%2529%3A503158_1%26cm_ven%3DLinkshare</product>
<productImage>http://content.nordstrom.com/imagegallery/store/product/large/0/_6880020.jpg</productImage>
<buy></buy>
</URL>
<description>
<short>Rugged construction and stylish good looks define a sporty sandal, with the added convenience and security of hook-and-loop closures across the toe and at the instep.Rugged construction and stylish good looks define a sporty sandal, with the added
convenience and security of h...</short>
<long>Rugged construction and stylish good looks define a sporty sandal, with the added convenience and security of hook-and-loop closures across the toe and at the instep.Rugged construction and stylish good looks define a sporty sandal, with the added
convenience and security of hook-and-loop closures across the toe and at the instep. Color(s): camo/ dark olive, daisy blue. Brand: Teva. Style Name: Teva 'Psyclone' Print Sandal (Baby, Walker & Toddler). Style Number: 503158_1.</long>
</description>
<discount currency="USD">
<amount></amount>
<type>amount</type>
</discount>
<price currency="USD">
<sale begin_date="" end_date="">24.95</sale>
<retail>24.95</retail>
</price>
<brand>Teva</brand>
<shipping>
<cost currency="USD">
<amount>0.00</amount>
<currency>USD</currency>
</cost>
<information></information>
<availability>Y</availability>
</shipping>
<keywords></keywords>
<upc>737872649135</upc>
<m1>503158_1.</m1>
<pixel>http://ad.linksynergy.com/fs-bin/show?id=LUyP0GcLCGc&bids=276223.52863929&type=15&subid=0</pixel>
<attributeClass class_id="60">
<Misc></Misc>
<Product_Type>Shoes</Product_Type>
<Size>6 M</Size>
<Material></Material>
<Color>CAMO/ DARK OLIVE</Color>
<Gender>Unisex</Gender>
<Style></Style>
<Age></Age>
</attributeClass>
</product>
答案 0 :(得分:8)
您有两种方法可以解决您的问题。
如wethat所述,XML2JS在解析数据之前需要完整的XML内容。但是你有一个文件流,它可以通过块来传输数据块。第一个解决方案是将此数据流转换为一个漂亮的大缓冲区,然后将其发送到XML2JS。为此,您可以使用stream-to
package(npm i stream-to
)将文件流转换为缓冲区数组,然后使用Buffer.concat
将其连接到一个缓冲区,如这样:
var fs = require('fs')
var streamTo = require('stream-to')
var xml2js = require('xml2js')
var file = fs.createReadStream('input.xml')
streamTo.array(file, function (err, arr) {
if (err) return console.log(err.message)
var content = Buffer.concat(arr)
var parser = new xml2js.Parser()
parser.parseString(content, function (err, res) {
if (err) return console.log(err.message)
console.log(res.merchandiser.product)
})
})
这很好用,但由于它需要将整个文件保存到内存中,如果输入文件非常大,它将无法工作。要处理非常大的文件,您需要使用流式XML解析器,例如sax
。但是sax
不会创建Javascript对象,但它是一个EventEmitter,并且使用起来有点困难,因为您必须处理所有相关事件才能动态构建对象。
您可以使用SaXPath library,它支持XPath语法的一小部分。每次匹配XPath模式时,此库都会发出match
事件。这是一个例子:
var saxpath = require('saxpath')
var fs = require('fs')
var sax = require('sax')
var saxParser = sax.createStream(true)
var streamer = new saxpath.SaXPath(saxParser, '/merchandiser/product')
streamer.on('match', function(xml) {
console.log(xml);
});
fs.createReadStream('input.xml').pipe(saxParser)
您有两个选择:
xml2js
一次解析一个产品答案 1 :(得分:0)
xml2js用于满载xml。
在使用sax的情况下,它是一个流解析器:
//安装
npm install sax
//此代码用于打印所有product_id
var fs = require('fs');
var sax = require('sax');
var saxStream = sax.createStream();
saxStream.onopentag = function (node) {
if(node.name === 'PRODUCT'){
console.log(node.attributes.PRODUCT_ID);
}
};
fs.createReadStream('xml/bigXML.xml').pipe(saxStream);
输出中:
52863929
26537849
25535647