使用beautifulsoup从脚本标签中抓取数据

时间:2020-07-07 02:47:58

标签: python beautifulsoup

我是python beautifoulsoup库的菜鸟,我正试图从网站的图表中抓取数据。我发现我需要的所有数据都位于一个脚本标签中,但是我不知道如何抓取它们(请参阅所附的图像)是否有办法使用python beautifulsoup从此脚本标签中获取数据? script

2 个答案:

答案 0 :(得分:0)

这取决于网站,使用的结构,使用的标签等。 但是,https://www.dataquest.io/blog/web-scraping-beautifulsoup/#:~:text=Using%20BeautifulSoup%20to%20parse%20the,creator%20from%20the%20package%20bs4%20。在抓取网站时对我有很大帮助。

答案 1 :(得分:0)

此脚本将获取图表中的所有变量:

import json
import requests


url = 'http://180.232.125.102/'
html_data = requests.get(url).text

out = {}
for k, v in re.findall(r'var ((?:system|luzon|visayas).*?)\s+= (\[.*?\]);', html_data):
    out[k] = json.loads(v)

# the data is in variable `out`, now just pretty print it to screen:
print(json.dumps(out, indent=4))

打印:

{
    "systemDemandVal": [
        10182.3,
        9911.2,
        9519.3,
        9302.5,
        9157.1,
        8948.4,
        9093.4,
        9995.2,
        10828.2,
        11391.4,
        11906.8,
        11632.8,
        11910,
        12295.7,
        12165.4,
        11767
    ],
    "systemRTDVal": [
        2075.25,
        2010.45,
        1767.63,
        1909.91,
        1859.16,
        1701.14,
        1706.64,
        1892.32,
        1712.98,
        1715.94,
        2052.51,
        1904.53,
        2057.63,
        2587.74,
        2582.84,
        2590.53
    ],
    "systemRTXVal": [
        2076.01,
        2001.47,
        1769.89,
        1808.5,
        1799.26,
        1701.01,
        1707.14,
        1713.88,
        1707.75,
        1752.76,
        2066.53,
        1802.88,
        2039.54,
        2587,
        2584.68
    ],
    "luzonDemandVal": [
        8684.2,
        8442,
        8103.1,
        7914.7,
        7772.3,
        7573,
        7677.9,
        8457.5,
        9152.1,
        9608.4,
        10021.5,
        9802.4,
        10040.6,
        10336.8,
        10234.5,
        9911.3
    ],
    "luzonRTDVal": [
        2075.25,
        2010.45,
        1767.63,
        1909.91,
        1859.16,
        1701.15,
        1706.64,
        1892.31,
        1712.98,
        1715.94,
        2052.5,
        1904.53,
        2057.63,
        2587.74,
        2582.84,
        2590.54
    ],
    "luzonRTXVal": [
        2076.01,
        2001.48,
        1769.89,
        1808.51,
        1799.26,
        1701.02,
        1707.14,
        1713.88,
        1707.75,
        1752.76,
        2066.53,
        1802.88,
        2039.54,
        2587,
        2584.68
    ],
    "visayasDemandVal": [
        1498.1,
        1469.2,
        1416.2,
        1387.8,
        1384.8,
        1375.4,
        1415.5,
        1537.7,
        1676.1,
        1783,
        1885.3,
        1830.4,
        1869.4,
        1958.9,
        1930.9,
        1855.7
    ],
    "visayasRTDVal": [
        2075.25,
        2010.46,
        1767.63,
        1909.91,
        1859.16,
        1701.12,
        1706.63,
        1892.33,
        1712.99,
        1715.94,
        2052.51,
        1904.53,
        2057.62,
        2587.74,
        2582.82,
        2590.51
    ],
    "visayasRTXVal": [
        2075.98,
        2001.47,
        1769.85,
        1808.49,
        1799.25,
        1701,
        1707.15,
        1713.86,
        1707.75,
        1752.77,
        2066.52,
        1802.88,
        2039.54,
        2587.03,
        2584.67
    ]
}