Node.js HTMLParser迭代多次

时间:2014-11-22 00:35:09

标签: node.js

我使用http模块在Node.js中有这个简单的HTMLParser:

var http = require('http');
var options = {
  hostname: 'www.google.com',
  port: 80,
  path: '/',
  method: 'GET'
};

var req = http.request(options, function(res) {
  res.setEncoding('utf8');
  res.on('data', function (chunk) {  
    var title1 = chunk.indexOf("<title>");  
    var title2 = chunk.indexOf("</title>"); 
    var titl = chunk.substring(title1 + 7);
    var result = titl.substring(0, titl.indexOf("</title>"));
    console.log("Title is : " + result);
  });
req.end();
});

req.on('error', function(e) {
  console.log('problem with request: ' + e.message);
});

req.end();

并且在执行时,不止一次迭代,所以我在命令行中得到了这个输出,它变化但总是迭代多次。

Title is: Google
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:

有任何帮助吗?提前谢谢!

1 个答案:

答案 0 :(得分:0)

当信息传入时,data事件可能会多次触发。您需要将传入的Buffer(即chunk)存储到您自己的缓冲区中,并在响应完成时进行解析。这就是为什么它被称为 - 它是部分数据。

var req = http.request(options, function(res) {
  res.setEncoding('utf8');
  var content = '';
  res.on('data', function (chunk) {
    content += chunk; // concatenate incoming data chunk to a response buffer
  });
  res.once('end', function() { // once the response has ended (it is complete)
    var title1 = content.indexOf("<title>");  // parse
    var title2 = content.indexOf("</title>"); 
    var titl = content.substring(title1 + 7);
    var result = titl.substring(0, titl.indexOf("</title>"));
    console.log("Title is : " + result);
  });
});

您可能还希望清除data上的响应事件end处理程序:

function storeChunk(chunk) {
  content += chunk;
}
res.on('data',storeChunk);
res.once('end',function() {
  res.removeListener('data',storeChunk);
  // ...
});