如何使用CheerioJS选择表中的元素

时间:2019-09-25 08:56:25

标签: node.js typescript cheerio

我想使用CheerioJS将HTML表值解析为JSON对象。

我正在努力了解Cheerio API,但我想出了一个令人费解的解决方案。

我正在尝试解析此HTML

    <table summary="Account summary" class="accounts-table">
    <thead>
        <tr>
            <th>Accounts</th>
            <th>Total value</th>
            <th>Available</th>
            <th>Actions</th>
        </tr>
    </thead>
    <tfoot>
        <tr>
            <td>
                Total
            </td>
            <td>
                £TOTALAMOUNT
            </td>
            <td>
                £CASH
            </td>
            <td></td>
        </tr>
    </tfoot>
    <tbody>

        <tr>
            <td style="white-space: normal">
                <a href="https://awebsitehere.co.uk/account_summary/account/22" title="View your Stocks ISA"
                    class="product-name">
                    Stocks ISA
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/account_summary/account/22" title="View your Stocks ISA">
                    ISA-VAL
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/amount_available/account/22"
                    title="View cash summary for your Stocks ISA">
                    ISA-CASH
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/topup/account/22" title="Top up your Stocks ISA"
                    class="top-up-button">
                    Top up
                </a>
                <a href="https://awebsitehere.co.uk/topup/account/22l" title="Place a deal in your Stocks  ISA"
                    class="deal-button">
                </a>
            </td>
        </tr>

        <tr>
            <td style="white-space: normal">
                <a href="https://awebsitehere.co.uk/account_summary/account/26" title="View your Junior ISA"
                    class="product-name">
                    Junior ISA
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/26"
                    title="View your Junior ISA">
                    JUNIOR-VAL
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/my-accounts/amount_available/account/26"
                    title="View cash summary for your Junior ISA">
                    JUNIOR-CASH
                </a>
            </td>
            <td>
            </td>
        </tr>

        <tr>
            <td style="white-space: normal">
                <a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
                    title="View your Stocks Account" class="product-name">
                    Stocks Account
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
                    title="View your Stocks Account">
                    STOCKS-VAL
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/my-accounts/amount_available/account/98"
                    title="View cash summary for your stocks Account">
                    STOCKS-CASH
                </a>
            </td>
            <td>
                <a href="https://awebsitehere.co.uk/my-accounts/stock_and_fund_search/account/98/action/deal"
                    title="Place a deal in your stocks Account" class="deal-button">
                    <span style="padding-right:8px;">Deal</span>
                </a>
            </td>
        </tr>
    </tbody>
</table>

插入如下所示的JSON对象:

{
    "accounts": {
        "Stocks ISA": {
            "investments":
            "total value": stocks-val
            "cash": stocks-cash,
            "link": "the href attached to this account"
        },
        "Junior ISA": {
            "investments":
            "total value": junior-val,
            "cash": junior-cash,
            "link": "the href attached to this account"
        },
        "stocks account": {
            "investments":
            "total value": stocks-val,
            "cash": stocks-cash,
            "link": "the href attached to this account"
        }
    }
}

到目前为止,这是我尝试过的代码,但是我一直在努力使HTML遍历。

const $ = cheerio.load(body)

$('table[class="accounts-table"] tbody tr').each(
    function (i, element) {
        //@ts-ignore
        let children = $(this).children()
        children.each(
            function (i, elem) {
                //@ts-ignore
                let children = $(this).children().text().trim()
            }
        )
    }
)

我将非常感谢能为我指出正确方向的人!

1 个答案:

答案 0 :(得分:1)

这应该将html解析为您要寻找的结构:

const $ = cheerio.load(body)

function parseAccountRow(elem) {
    let row = [];
    let href = null;

    $("td a", elem).each((n, link) => {
        row.push($(link).text().trim());
        href = href || link.attribs.href;
    });
    return { name: row[0], value: { "investments": "", "total value": (row[1]+"").toLowerCase(), cash: (row[2]+"").toLowerCase(), link: href } };
}

let parsedObj = {};
$('table[class="accounts-table"] tbody tr').each((i, elem)  => {
    let row = parseAccountRow(elem);
    parsedObj[row.name] = row.value;
});

console.log("Result:", parsedObj);

我在下面得到结果:

{
    "Stocks ISA": {
        "investments": "",
        "total value": "isa-val",
        "cash": "isa-cash",
        "link": "https://awebsitehere.co.uk/account_summary/account/22"
    },
    "Junior ISA": {
        "investments": "",
        "total value": "junior-val",
        "cash": "junior-cash",
        "link": "https://awebsitehere.co.uk/account_summary/account/26"
    },
    "Stocks Account": {
        "investments": "",
        "total value": "stocks-val",
        "cash": "stocks-cash",
        "link": "https://awebsitehere.co.uk/my-accounts/account_summary/account/98"
    }
}