使用'node-htmlparser'模块时出现奇怪的错误

时间:2011-12-15 18:45:49

标签: javascript node.js html-parsing

我正在逐行阅读XML文件的庞然大物。使用htmlparser(v1.7.3),我试图获取最终将写入数据库的属性值。

以下是代码:

var fs = require('fs');
var sys = require('util');
var htmlparser = require('htmlparser');

function parseFoo(fileName) {
  var stream = fs.createReadStream(fileName);
  var buffer = "";

  stream.addListener('data', function(data) {
    buffer += data.toString();
    var parts = buffer.split('\n');
    parts.forEach(function(part, i) {
      record.buildRecord(part);
    })
    buffer = parts.pop(); // Add remaining text to the buffer.
  })
}

var record = (function() {
  var handler = new htmlparser.DefaultHandler(function (error, dom) {});
  var parser = new htmlparser.Parser(handler);
  return {
    'buildRecord': function(xml) {
      parser.parseComplete(xml);      

      // Only for testing.
      console.error(sys.inspect(handler.dom[0]['children'][0], false, null));
    }
  };
})();

parseFoo('foo.xml');

'foo.xml'(UTF-8编码)是微不足道的,它由如下所示的大量行组成:

<w a="1" b="2" c="3" d="4">x</w>

当程序到达某一行(1204)时,我收到错误:

D:\Geci\foo.js:25
    console.error(sys.inspect(handler.dom[0]['children'][0], false, null));

TypeError: Cannot read property '0' of undefined
    at Object.buildRecord (D:\Geci\foo.js:26:59)
    at D:\Geci\foo.js:14:14
    at Array.forEach (native)
    at [object Object].<anonymous> (D:\Geci\foo.js:13:11)
    at [object Object].emit (events.js:67:17)
    at [object Object]._emitData (fs.js:1149:10)
    at afterRead (fs.js:1131:10)
    at Object.wrapper [as oncomplete] (fs.js:254:17)

我正在运行Windows版本的node.js(0.6.5.1)。

这是htmlparser中的错误还是我只是一个node.js新手?

编辑:通过向stream添加'end'监听器来解决此错误,我会特别注意最后一行。加上在循环之前移动parts.pop()

0 个答案:

没有答案