我正在尝试使用cheerio js
从此页面获取数据:
var html =
"<div class='clear'>" +
"<div class='time_head'>time_head content1</div>"
+ "<div class='blockfix'>blockfix1</div>"
+ "<div class='blockfix'>blockfix2</div>"
+ "<div class='time_head'>time_head content2</div>"
+ "<div class='blockfix'>blockfix3</div>"
+ "<div class='blockfix'>blockfix4</div>"
+ "<div class='blockfix'>blockfix5</div>"
+ "</div>";
这是我到目前为止尝试过的:
$ = cheerio.load(html);
let devtoList = [];
$('.clear').each(function (i, elem) {
devtoList[i] = {
title: $(this).find('.time_head').text(),
game: $(this).find('.blockfix').text()
};
});
const devtoListTrimmed = devtoList.filter(n => n != undefined);
console.log(devtoListTrimmed);
结果是:
[
{ title: 'time_head content1time_head content2',
game: 'blockfix1blockfix2blockfix3blockfix4blockfix5' }
]
但是我需要每个time_head
及其blockfix
TIME_HEAD CONTENT1
----blockfix1
----blockfix2
TIME_HEAD CONTENT2
----blockfix3
----blockfix4
请注意:
1- time_head
的数量总是变化
2-我愿意接受其他解决方案
答案 0 :(得分:1)
使用.time_head
获取所有元素,对其进行迭代,应用while循环,直到下一个元素具有类blockfix
。
const output = [];
$('.time_head').each(function(i) {
let next = $(this).next('.blockfix');
output.push({"title": $(this).text(), game: []});
while(next) {
output[i].game.push(next.text());
const isNext = $(next).next('.blockfix');
next = isNext.length > 0 ? $(next).next('.blockfix') : false;
}
});
console.log(output);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<div class='clear'>
<div class='time_head'>time_head content1</div>
<div class='blockfix'>blockfix1</div>
<div class='blockfix'>blockfix2</div>
<div class='time_head'>time_head content2</div>
<div class='blockfix'>blockfix3</div>
<div class='blockfix'>blockfix4</div>
<div class='blockfix'>blockfix5</div>
</div>