我是python beautifoulsoup库的菜鸟,我正试图从网站的图表中抓取数据。我发现我需要的所有数据都位于一个脚本标签中,但是我不知道如何抓取它们(请参阅所附的图像)是否有办法使用python beautifulsoup从此脚本标签中获取数据? script
答案 0 :(得分:0)
这取决于网站,使用的结构,使用的标签等。 但是,https://www.dataquest.io/blog/web-scraping-beautifulsoup/#:~:text=Using%20BeautifulSoup%20to%20parse%20the,creator%20from%20the%20package%20bs4%20。在抓取网站时对我有很大帮助。
答案 1 :(得分:0)
此脚本将获取图表中的所有变量:
import json
import requests
url = 'http://180.232.125.102/'
html_data = requests.get(url).text
out = {}
for k, v in re.findall(r'var ((?:system|luzon|visayas).*?)\s+= (\[.*?\]);', html_data):
out[k] = json.loads(v)
# the data is in variable `out`, now just pretty print it to screen:
print(json.dumps(out, indent=4))
打印:
{
"systemDemandVal": [
10182.3,
9911.2,
9519.3,
9302.5,
9157.1,
8948.4,
9093.4,
9995.2,
10828.2,
11391.4,
11906.8,
11632.8,
11910,
12295.7,
12165.4,
11767
],
"systemRTDVal": [
2075.25,
2010.45,
1767.63,
1909.91,
1859.16,
1701.14,
1706.64,
1892.32,
1712.98,
1715.94,
2052.51,
1904.53,
2057.63,
2587.74,
2582.84,
2590.53
],
"systemRTXVal": [
2076.01,
2001.47,
1769.89,
1808.5,
1799.26,
1701.01,
1707.14,
1713.88,
1707.75,
1752.76,
2066.53,
1802.88,
2039.54,
2587,
2584.68
],
"luzonDemandVal": [
8684.2,
8442,
8103.1,
7914.7,
7772.3,
7573,
7677.9,
8457.5,
9152.1,
9608.4,
10021.5,
9802.4,
10040.6,
10336.8,
10234.5,
9911.3
],
"luzonRTDVal": [
2075.25,
2010.45,
1767.63,
1909.91,
1859.16,
1701.15,
1706.64,
1892.31,
1712.98,
1715.94,
2052.5,
1904.53,
2057.63,
2587.74,
2582.84,
2590.54
],
"luzonRTXVal": [
2076.01,
2001.48,
1769.89,
1808.51,
1799.26,
1701.02,
1707.14,
1713.88,
1707.75,
1752.76,
2066.53,
1802.88,
2039.54,
2587,
2584.68
],
"visayasDemandVal": [
1498.1,
1469.2,
1416.2,
1387.8,
1384.8,
1375.4,
1415.5,
1537.7,
1676.1,
1783,
1885.3,
1830.4,
1869.4,
1958.9,
1930.9,
1855.7
],
"visayasRTDVal": [
2075.25,
2010.46,
1767.63,
1909.91,
1859.16,
1701.12,
1706.63,
1892.33,
1712.99,
1715.94,
2052.51,
1904.53,
2057.62,
2587.74,
2582.82,
2590.51
],
"visayasRTXVal": [
2075.98,
2001.47,
1769.85,
1808.49,
1799.25,
1701,
1707.15,
1713.86,
1707.75,
1752.77,
2066.52,
1802.88,
2039.54,
2587.03,
2584.67
]
}