import requests
import pandas
from bs4 import BeautifulSoup
time = []
ticker = []
name = []
URL = 'https://stockbeep.com/52-week-high-stock-screener'
page = requests.get(URL)
soup = BeautifulSoup(page.text, 'html.parser')
odd_rows = soup.find_all('tr', {'class':'odd'})
even_rows = soup.find_all('tr', {'class':'even'})
print(odd_rows)
print(even_rows)
该网站每 5 秒更新一次。看来问题是包含表的数据的(tbody)没有在soup中抓取,不知道为什么。
答案 0 :(得分:2)
import requests
import bs4
import pandas as pd
def main(url):
with requests.Session() as req:
r = req.get(url)
soup = bs4.BeautifulSoup(r.text, 'lxml')
params = {
"hash": soup.find('table', {'data-hash': True})['data-hash'],
"country": "us",
"time-zone": "-120",
"sort-column": "position",
"sort-order": "desc",
# "_": "1617683270138"
}
r = req.get(
'https://stockbeep.com/table-data/52-week-high-stock-screener', params=params)
df = pd.DataFrame(r.json()['data'])
df.drop(['xindex', 'sscode', 'ssarrow',
'cellClasses'], axis=1, inplace=True)
print(df)
df.to_csv('Data.csv', index=False)
main('https://stockbeep.com/52-week-high-stock-screener')
输出:
sstime ssname sslast sshigh ... ssrvol ytd sscap position
0 15:36 Avis Budget 75.00 76.49 ... 3.1 101.0 5.18B New ATH
1 18:31 KLA 355.08 359.69 ... 4.9 37.1 53.5B New ATH
2 19:22 Alphabet 2,218.96 2,228.99 ... 7.4 26.6 1.44T New ATH
3 18:45 CSX 98.57 98.85 ... 3.5 8.6 73.9B New ATH
4 20:29 Target 205.35 205.80 ... 3.9 16.3 100B New ATH
.. ... ... ... ... ... ... ... ... ...
95 16:23 Verizon Comms 58.87 59.13 ... 4.3 0.2 241B 3M high
96 20:09 Dollar General 207.78 208.07 ... 4.3 -1.1 48.4B 3M high
97 17:04 Exelon 44.48 44.74 ... 4.9 5.3 42.9B 3M high
98 18:48 NiSource 24.43 24.52 ... 4.0 6.4 9.43B 3M high
99 15:35 United Parcel Service 171.95 173.04 ... 3.7 2.1 148B 3M high
[100 rows x 12 columns]