如何从html节点<a>?

时间:2018-12-28 11:55:29

标签: javascript node.js

I am trying to scrape the website below and I am not getting the value of the attribute 'data-link'.

http://www.apptrace.com/itunes/charts/FRA/topfreeapplications/36/2018-12-27

获取属性值

有人可以帮我吗?

//尝试#1(错误)

const puppeteer = require('puppeteer')

let scrape = async () => {
    const browser = await puppeteer.launch({headless: true})
    const page = await browser.newPage()
    await page.goto('http://www.apptrace.com/itunes/charts/USA/topfreeapplications/36')
    await page.waitFor(1000)

    const countryCharts = await page.evaluate(() => {

    const abbrAppsCountry = []
    document.getElementById('#current_storefront_list')
        .getAttribute('li > a[data-link]')
        .forEach(app => abbrAppsCountry.push(app.value))

        return abbrAppsCountry

    })

browser.close()
return countryCharts
}

scrape().then((value) => {
    console.log(value)
})

//尝试#2(空数组)

const puppeteer = require('puppeteer')

let scrape = async () => {
    const browser = await puppeteer.launch({headless: true})
    const page = await browser.newPage()
    await page.goto('http://www.apptrace.com/itunes/charts/USA/topfreeapplications/36')
    await page.waitFor(1000)

    const countryCharts = await page.evaluate(() => {

    const abbrAppsCountry = []
    document.querySelectorAll('#current_storefront_list > li > a[data-link]')
        .forEach(app => abbrAppsCountry.push(app.value))

        return abbrAppsCountry

    })

browser.close()
return countryCharts
}

scrape().then((value) => {
console.log(value)
})

我想输入国家名称的缩写。

1 个答案:

答案 0 :(得分:0)

您可以使用datasetgetAttribute API:

document.querySelectorAll('#current_storefront_list > li > a')
        .forEach(app => abbrAppsCountry.push(app.dataset.link))

或者:

document.querySelectorAll('#current_storefront_list > li > a')
        .forEach(app => abbrAppsCountry.push(app.getAttribute('data-link')))