现在,我的代码从包含多个文档的XML文件创建一个流,并将其转换为一个XML文档。然后它将该字符串传递给XML解析器,该解析器允许我从XML流中检索数据。我想通过分离创建xml流的逻辑和xml解析器的逻辑来模块化我的应用程序。继承我的代码:
var fs = require('fs');
var path = require('path');
var split = require("split");
var util = require("util");
var Transform = require("stream").Transform;
var XmlStream = require('xml-stream');
var parseXml;
// Create a file stream and pass it to XmlStream
var streamXmlFile = fs.createReadStream(path.join(__dirname, 'ipg140107.xml'));
util.inherits(CombineXmlDocs, Transform);
function CombineXmlDocs () {
Transform.call(this, { "objectMode": true });
this.currLine = 0;
}
CombineXmlDocs.prototype._transform = function (line, encoding, processed) {
this.currLine++;
if (this.currLine === 3) {
this.push('<week-of-patents>\n');
}
if (this.currLine < 3 || (line.indexOf('<?xml') === -1 && line.indexOf('<!DOCTYPE') === -1)) { // handle first line
this.push(line + '\n');
}
processed(); // we're done processing the current line
};
CombineXmlDocs.prototype._flush = function(cb) {
cb();
};
// streamXmlFile.pipe(split()).pipe(new CombineXmlDocs()).pipe(process.stdout);
parseXml = new XmlStream(streamXmlFile.pipe(split()).pipe(new CombineXmlDocs()));
// parseXml = new XmlStream(streamXmlFile);
parseXml.on('endElement: date', function(date) {
console.log(date.$text);
});
答案 0 :(得分:0)
我通过创建连接xml doc并导出流的流来完成此操作。然后在我的app.js中需要该文件,并使用导出的流传递给我的XML解析器。
app.js代码:
var createParsableXml = require('./createParsableXml.js');
var XmlParserStream = require('xml-stream');
var parseXml;
// create a parsable xml stream using xml-stream node.js module
parseXml = new XmlParserStream(createParsableXml.streamConcatXml);
parseXml.on('endElement: date', function(date) {
console.log(date.$text);
});
createParsableXml.js代码:
var fs = require('fs');
var path = require('path');
// allows for stream parsing on a line by line basis
var split = require('split');
var util = require('util');
var Transform = require('stream').Transform;
var streamXmlFile = fs.createReadStream(path.join(__dirname, 'ipg140107.xml'));
util.inherits(CombineXmlDocs, Transform);
function CombineXmlDocs () {
Transform.call(this, { "objectMode": true });
this.currLine = 0;
}
// logic for parsing concatenated uspto xml docs into one xml doc
CombineXmlDocs.prototype._transform = function (line, encoding, processed) {
// removes xml and doctype declarations except for the first instance, adds root element to doc
this.currLine++;
if (this.currLine === 3) {
this.push('<week-of-patents>\n');
}
if (this.currLine < 3 || (line.indexOf('<?xml') === -1 && line.indexOf('<!DOCTYPE') === -1)) { // handle first line
this.push(line + '\n');
}
processed(); // we're done processing the current line
};
CombineXmlDocs.prototype._flush = function(cb) {
cb();
};
exports.streamConcatXml = streamXmlFile.pipe(split()).pipe(new CombineXmlDocs());