fs.readFile(htmlPath, 'utf8', function(err, html) {
var htmlparser = require("htmlparser2");
var primary, secondary;
var handler = new htmlparser.DomHandler(function(error, dom) {
for (var i = 0; i < dom.length; i++) {
if (((dom[i].attribs !== undefined) ? dom[i].attribs.class === 'offer-wrapper' : false) && (dom[i].children[1] !== undefined ? dom[i].children[1].children[1] !== undefined : false)) {
if (dom[i].children[1].children[1] !== undefined ? (dom[i].children[1].children[1].name !== undefined ? (dom[i].children[1].children[1].name === 'p' && dom[i].children[1].children[1].children[0] !== undefined ? dom[i].children[1].children[1].children[0].data !== undefined : false) : false) : false) {
primary = dom[i].children[1].children[1].children[0].data.trim();
}
}
if ((dom[i].attribs !== undefined ? dom[i].attribs.class === 'promo-banner' : false) && (dom[i].children[1] !== undefined ? ((dom[i].children[1].name !== undefined && dom[i].children[1].children[0] !== undefined) ? (dom[i].children[1].name === 'p' && dom[i].children[1].children[0].data !== undefined) : false) : false)) {
secondary = dom[i].children[1].children[0].data.trim();
}
}
});
var parser = new htmlparser.Parser(handler);
parser.write(html);
parser.end();
});
HTML文件的格式为格式1
<div class=offer-wrapper>
<div class=offer>
<p>Content1</p>
</div>
</div>
<div class=promo-banner>
<p>Content 2</p>
</div>
或 格式2
<div class=promo-banner>
<p>Content 2</p>
</div>
或 格式3
<div class=offer-wrapper>
<div class=offer>
<p>Content1</p>
</div>
</div>
<div class=promo-banner>
</div>
当我尝试阅读HTML文件内容时,我只能阅读格式2和格式的内容。 3不是来自format1
有人可以帮帮我吗?