我使用http模块在Node.js中有这个简单的HTMLParser:
var http = require('http');
var options = {
hostname: 'www.google.com',
port: 80,
path: '/',
method: 'GET'
};
var req = http.request(options, function(res) {
res.setEncoding('utf8');
res.on('data', function (chunk) {
var title1 = chunk.indexOf("<title>");
var title2 = chunk.indexOf("</title>");
var titl = chunk.substring(title1 + 7);
var result = titl.substring(0, titl.indexOf("</title>"));
console.log("Title is : " + result);
});
req.end();
});
req.on('error', function(e) {
console.log('problem with request: ' + e.message);
});
req.end();
并且在执行时,不止一次迭代,所以我在命令行中得到了这个输出,它变化但总是迭代多次。
Title is: Google
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
Title is:
有任何帮助吗?提前谢谢!
答案 0 :(得分:0)
当信息传入时,data
事件可能会多次触发。您需要将传入的Buffer
(即chunk
)存储到您自己的缓冲区中,并在响应完成时进行解析。这就是为什么它被称为块 - 它是部分数据。
var req = http.request(options, function(res) {
res.setEncoding('utf8');
var content = '';
res.on('data', function (chunk) {
content += chunk; // concatenate incoming data chunk to a response buffer
});
res.once('end', function() { // once the response has ended (it is complete)
var title1 = content.indexOf("<title>"); // parse
var title2 = content.indexOf("</title>");
var titl = content.substring(title1 + 7);
var result = titl.substring(0, titl.indexOf("</title>"));
console.log("Title is : " + result);
});
});
您可能还希望清除data
上的响应事件end
处理程序:
function storeChunk(chunk) {
content += chunk;
}
res.on('data',storeChunk);
res.once('end',function() {
res.removeListener('data',storeChunk);
// ...
});