我想使用CheerioJS将HTML表值解析为JSON对象。
我正在努力了解Cheerio API,但我想出了一个令人费解的解决方案。
我正在尝试解析此HTML
<table summary="Account summary" class="accounts-table">
<thead>
<tr>
<th>Accounts</th>
<th>Total value</th>
<th>Available</th>
<th>Actions</th>
</tr>
</thead>
<tfoot>
<tr>
<td>
Total
</td>
<td>
£TOTALAMOUNT
</td>
<td>
£CASH
</td>
<td></td>
</tr>
</tfoot>
<tbody>
<tr>
<td style="white-space: normal">
<a href="https://awebsitehere.co.uk/account_summary/account/22" title="View your Stocks ISA"
class="product-name">
Stocks ISA
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/account_summary/account/22" title="View your Stocks ISA">
ISA-VAL
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/amount_available/account/22"
title="View cash summary for your Stocks ISA">
ISA-CASH
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/topup/account/22" title="Top up your Stocks ISA"
class="top-up-button">
Top up
</a>
<a href="https://awebsitehere.co.uk/topup/account/22l" title="Place a deal in your Stocks ISA"
class="deal-button">
</a>
</td>
</tr>
<tr>
<td style="white-space: normal">
<a href="https://awebsitehere.co.uk/account_summary/account/26" title="View your Junior ISA"
class="product-name">
Junior ISA
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/26"
title="View your Junior ISA">
JUNIOR-VAL
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/my-accounts/amount_available/account/26"
title="View cash summary for your Junior ISA">
JUNIOR-CASH
</a>
</td>
<td>
</td>
</tr>
<tr>
<td style="white-space: normal">
<a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
title="View your Stocks Account" class="product-name">
Stocks Account
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
title="View your Stocks Account">
STOCKS-VAL
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/my-accounts/amount_available/account/98"
title="View cash summary for your stocks Account">
STOCKS-CASH
</a>
</td>
<td>
<a href="https://awebsitehere.co.uk/my-accounts/stock_and_fund_search/account/98/action/deal"
title="Place a deal in your stocks Account" class="deal-button">
<span style="padding-right:8px;">Deal</span>
</a>
</td>
</tr>
</tbody>
</table>
插入如下所示的JSON对象:
{
"accounts": {
"Stocks ISA": {
"investments":
"total value": stocks-val
"cash": stocks-cash,
"link": "the href attached to this account"
},
"Junior ISA": {
"investments":
"total value": junior-val,
"cash": junior-cash,
"link": "the href attached to this account"
},
"stocks account": {
"investments":
"total value": stocks-val,
"cash": stocks-cash,
"link": "the href attached to this account"
}
}
}
到目前为止,这是我尝试过的代码,但是我一直在努力使HTML遍历。
const $ = cheerio.load(body)
$('table[class="accounts-table"] tbody tr').each(
function (i, element) {
//@ts-ignore
let children = $(this).children()
children.each(
function (i, elem) {
//@ts-ignore
let children = $(this).children().text().trim()
}
)
}
)
我将非常感谢能为我指出正确方向的人!
答案 0 :(得分:1)
这应该将html解析为您要寻找的结构:
const $ = cheerio.load(body)
function parseAccountRow(elem) {
let row = [];
let href = null;
$("td a", elem).each((n, link) => {
row.push($(link).text().trim());
href = href || link.attribs.href;
});
return { name: row[0], value: { "investments": "", "total value": (row[1]+"").toLowerCase(), cash: (row[2]+"").toLowerCase(), link: href } };
}
let parsedObj = {};
$('table[class="accounts-table"] tbody tr').each((i, elem) => {
let row = parseAccountRow(elem);
parsedObj[row.name] = row.value;
});
console.log("Result:", parsedObj);
我在下面得到结果:
{
"Stocks ISA": {
"investments": "",
"total value": "isa-val",
"cash": "isa-cash",
"link": "https://awebsitehere.co.uk/account_summary/account/22"
},
"Junior ISA": {
"investments": "",
"total value": "junior-val",
"cash": "junior-cash",
"link": "https://awebsitehere.co.uk/account_summary/account/26"
},
"Stocks Account": {
"investments": "",
"total value": "stocks-val",
"cash": "stocks-cash",
"link": "https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
}
}