我正在逐行阅读XML文件的庞然大物。使用htmlparser(v1.7.3),我试图获取最终将写入数据库的属性值。
以下是代码:
var fs = require('fs');
var sys = require('util');
var htmlparser = require('htmlparser');
function parseFoo(fileName) {
var stream = fs.createReadStream(fileName);
var buffer = "";
stream.addListener('data', function(data) {
buffer += data.toString();
var parts = buffer.split('\n');
parts.forEach(function(part, i) {
record.buildRecord(part);
})
buffer = parts.pop(); // Add remaining text to the buffer.
})
}
var record = (function() {
var handler = new htmlparser.DefaultHandler(function (error, dom) {});
var parser = new htmlparser.Parser(handler);
return {
'buildRecord': function(xml) {
parser.parseComplete(xml);
// Only for testing.
console.error(sys.inspect(handler.dom[0]['children'][0], false, null));
}
};
})();
parseFoo('foo.xml');
'foo.xml'(UTF-8编码)是微不足道的,它由如下所示的大量行组成:
<w a="1" b="2" c="3" d="4">x</w>
当程序到达某一行(1204)时,我收到错误:
D:\Geci\foo.js:25
console.error(sys.inspect(handler.dom[0]['children'][0], false, null));
TypeError: Cannot read property '0' of undefined
at Object.buildRecord (D:\Geci\foo.js:26:59)
at D:\Geci\foo.js:14:14
at Array.forEach (native)
at [object Object].<anonymous> (D:\Geci\foo.js:13:11)
at [object Object].emit (events.js:67:17)
at [object Object]._emitData (fs.js:1149:10)
at afterRead (fs.js:1131:10)
at Object.wrapper [as oncomplete] (fs.js:254:17)
我正在运行Windows版本的node.js(0.6.5.1)。
这是htmlparser中的错误还是我只是一个node.js新手?
编辑:通过向stream
添加'end'监听器来解决此错误,我会特别注意最后一行。加上在循环之前移动parts.pop()
。