我在Windows 7上使用Python 3.
但是,我无法下载网络中列出的部分数据 网站如下:
http://data.tsci.com.cn/stock/00939/STK_Broker.htm
453.IMC 98.28M 18.44M 4.32 5.33 1499.Optiver 70.91M 13.29M 3.12 5.34 7387.花旗环球52.72M 9.84M 2.32 5.36当我使用Google Chrome并使用“查看页面来源”时,数据不会 显示出来。但是,当我使用'Inspect'时,我能够阅读 数据
'<th>1453.IMC</th>'
'<td>98.28M</td>'
'<td>18.44M</td>'
'<td>4.32</td>'
'<td>5.33</td>'
'<th>1499.Optiver </th>'
'<td> 70.91M</td>'
'<td>13.29M </td>'
'<td>3.12</td>'
'<td>5.34</td>'
如果数据隐藏在CSS样式表或中,请向我解释 有没有办法检索列出的数据。
谢谢
此致 Crusier
from bs4 import BeautifulSoup
import urllib
import requests
stock_code = ('00939', '0001')
def web_scraper(stock_code):
broker_url = 'http://data.tsci.com.cn/stock/'
end_url = '/STK_Broker.htm'
for code in stock_code:
new_url = broker_url + code + end_url
response = requests.get(new_url)
html = response.content
soup = BeautifulSoup(html, "html.parser")
Buylist = soup.find_all('div', id ="BuyingSeats")
Selllist = soup.find_all('div', id ="SellSeats")
print(Buylist)
print(Selllist)
web_scraper(stock_code)
答案 0 :(得分:0)
正如已经提到的那样,Selenium是可行的方式。
from selenium import webdriver
broker_url = 'http://data.tsci.com.cn/stock/00939/STK_Broker.htm'
mydriver = webdriver.Chrome()
mydriver.get(broker_url)
BuyList = mydriver.find_element_by_css_selector('#Buylist')
rows = BuyList.find_elements_by_tag_name('tr')
for row in rows:
print(row.text)
答案 1 :(得分:0)
数据是动态生成的,但您可以模仿ajax请求并以json格式获取:
import requests
params = {"Code": "E00939",
"PkgType": "11036",
"val": "50"}
js = requests.get("http://data.tsci.com.cn/RDS.aspx", params=params).json()
print(js)
它为您提供了如下表格数据:
{u'BrokerBuy': [{u'AV': u'5.24',
u'BrokerNo': u'Optiver',
u'percent': u'10.09',
u'shares': u'43.06M',
u'turnover': u'225.67M'},
{u'AV': u'5.26',
u'BrokerNo': u'UBS HK',
u'percent': u'4.81',
u'shares': u'20.47M',
u'turnover': u'107.63M'},
{u'AV': u'5.22',
u'BrokerNo': u'\u4e2d\u94f6\u56fd\u9645',
u'percent': u'4.63',
u'shares': u'19.83M',
u'turnover': u'103.51M'},
{u'AV': u'5.25',
u'BrokerNo': u'\u745e\u4fe1',
u'percent': u'3.88',
u'shares': u'16.54M',
u'turnover': u'86.82M'},
{u'AV': u'5.24',
u'BrokerNo': u'IMC',
u'percent': u'3.84',
u'shares': u'16.38M',
u'turnover': u'85.89M'}],
u'BrokerSell': [{u'AV': u'5.21',
u'BrokerNo': u'\u4e2d\u6295\u4fe1\u606f',
u'percent': u'8.90',
u'shares': u'38.19M',
u'turnover': u'199.12M'},
{u'AV': u'5.24',
u'BrokerNo': u'Optiver',
u'percent': u'5.51',
u'shares': u'23.55M',
u'turnover': u'123.29M'},
{u'AV': u'5.24',
u'BrokerNo': u'\u9ad8\u76db\u4e9a\u6d32',
u'percent': u'4.43',
u'shares': u'18.91M',
u'turnover': u'99.19M'},
{u'AV': u'5.28',
u'BrokerNo': u'JPMorgan',
u'percent': u'2.28',
u'shares': u'9.67M',
u'turnover': u'51.09M'},
{u'AV': u'5.25',
u'BrokerNo': u'IMC',
u'percent': u'0.88',
u'shares': u'3.76M',
u'turnover': u'19.70M'}],
u'Buy': [{u'AV': u'5.24',
u'BrokerNo': u'1499.Optiver',
u'percent': u'10.09',
u'shares': u'43.06M',
u'turnover': u'225.67M'},
{u'AV': u'5.24',
u'BrokerNo': u'1453.IMC',
u'percent': u'3.84',
u'shares': u'16.38M',
u'turnover': u'85.89M'},
{u'AV': u'5.24',
u'BrokerNo': u'7387.\u82b1\u65d7\u73af\u7403',
u'percent': u'3.08',
u'shares': u'13.16M',
u'turnover': u'68.97M'},
{u'AV': u'5.23',
u'BrokerNo': u'6698.\u76c8\u900f\u8bc1\u5238',
u'percent': u'1.74',
u'shares': u'7.43M',
u'turnover': u'38.86M'},
{u'AV': u'5.21',
u'BrokerNo': u'1799.\u8000\u624d\u8bc1\u5238',
u'percent': u'1.44',
u'shares': u'6.18M',
u'turnover': u'32.16M'}],
u'NetBuy': [{u'AV': u'5.25',
u'BrokerNo': u'1499.Optiver',
u'percent': u'4.58',
u'shares': u'19.51M',
u'turnover': u'102.37M'},
{u'AV': u'5.24',
u'BrokerNo': u'1453.IMC',
u'percent': u'2.96',
u'shares': u'12.62M',
u'turnover': u'66.19M'},
{u'AV': u'5.24',
u'BrokerNo': u'7387.\u82b1\u65d7\u73af\u7403',
u'percent': u'2.81',
u'shares': u'11.98M',
u'turnover': u'62.78M'},
{u'AV': u'5.23',
u'BrokerNo': u'6698.\u76c8\u900f\u8bc1\u5238',
u'percent': u'1.66',
u'shares': u'7.12M',
u'turnover': u'37.24M'},
{u'AV': u'5.26',
u'BrokerNo': u'9065.UBS HK',
u'percent': u'1.39',
u'shares': u'5.91M',
u'turnover': u'31.11M'}],
u'NetNameBuy': [{u'AV': u'5.26',
u'BrokerNo': u'UBS HK',
u'percent': u'4.58',
u'shares': u'19.49M',
u'turnover': u'102.44M'},
{u'AV': u'5.25',
u'BrokerNo': u'Optiver',
u'percent': u'4.58',
u'shares': u'19.51M',
u'turnover': u'102.37M'},
{u'AV': u'5.22',
u'BrokerNo': u'\u4e2d\u94f6\u56fd\u9645',
u'percent': u'4.28',
u'shares': u'18.37M',
u'turnover': u'95.84M'},
{u'AV': u'5.24',
u'BrokerNo': u'\u745e\u4fe1',
u'percent': u'3.16',
u'shares': u'13.49M',
u'turnover': u'70.68M'},
{u'AV': u'5.24',
u'BrokerNo': u'IMC',
u'percent': u'2.96',
u'shares': u'12.62M',
u'turnover': u'66.19M'}],
u'NetNameSell': [{u'AV': u'5.29',
u'BrokerNo': u'\u5174\u4e1a\u91d1\u878d',
u'percent': u'0.37',
u'shares': u'1.58M',
u'turnover': u'8.36M'},
{u'AV': u'5.25',
u'BrokerNo': u'\u4e2d\u56fd\u91d1\u878d',
u'percent': u'0.16',
u'shares': u'696K',
u'turnover': u'3.65M'},
{u'AV': u'5.32',
u'BrokerNo': u'\u94f6\u6cb3\u56fd\u9645',
u'percent': u'0.16',
u'shares': u'671K',
u'turnover': u'3.57M'},
{u'AV': u'5.29',
u'BrokerNo': u'Penjing',
u'percent': u'0.07',
u'shares': u'300K',
u'turnover': u'1.59M'},
{u'AV': u'5.31',
u'BrokerNo': u'\u5efa\u94f6\u56fd\u9645',
u'percent': u'0.06',
u'shares': u'272K',
u'turnover': u'1.44M'}],
u'NetSell': [{u'AV': u'5.21',
u'BrokerNo': u'6999.\u4e2d\u6295\u4fe1\u606f',
u'percent': u'8.61',
u'shares': u'36.93M',
u'turnover': u'192.59M'},
{u'AV': u'5.24',
u'BrokerNo': u'3440.\u9ad8\u76db\u4e9a\u6d32',
u'percent': u'4.03',
u'shares': u'17.20M',
u'turnover': u'90.15M'},
{u'AV': u'5.30',
u'BrokerNo': u'5337.JPMorgan',
u'percent': u'0.67',
u'shares': u'2.83M',
u'turnover': u'15.00M'},
{u'AV': u'5.29',
u'BrokerNo': u'5980.\u5174\u4e1a\u91d1\u878d',
u'percent': u'0.37',
u'shares': u'1.58M',
u'turnover': u'8.36M'},
{u'AV': u'5.30',
u'BrokerNo': u'8738.\u6c47\u4e30\u8bc1\u5238',
u'percent': u'0.36',
u'shares': u'1.53M',
u'turnover': u'8.10M'}],
u'Sell': [{u'AV': u'5.21',
u'BrokerNo': u'6999.\u4e2d\u6295\u4fe1\u606f',
u'percent': u'8.90',
u'shares': u'38.19M',
u'turnover': u'199.12M'},
{u'AV': u'5.24',
u'BrokerNo': u'1499.Optiver',
u'percent': u'5.51',
u'shares': u'23.55M',
u'turnover': u'123.29M'},
{u'AV': u'5.24',
u'BrokerNo': u'3440.\u9ad8\u76db\u4e9a\u6d32',
u'percent': u'4.19',
u'shares': u'17.89M',
u'turnover': u'93.75M'},
{u'AV': u'5.25',
u'BrokerNo': u'1453.IMC',
u'percent': u'0.88',
u'shares': u'3.76M',
u'turnover': u'19.70M'},
{u'AV': u'5.30',
u'BrokerNo': u'5337.JPMorgan',
u'percent': u'0.70',
u'shares': u'2.96M',
u'turnover': u'15.66M'}],
u'Total': {u'In': u'1.26B',
u'Net': u'5.800971E+08',
u'Out': u'682.58M',
u'right': u'98.71'}}
其中包含所有表格数据,只需使用密钥即可访问您需要的内容。
所以在循环中,只需传递每个代码:
for code in stock_code:
params["Code"] = "E{}".format(code)
js = requests.get("http://data.tsci.com.cn/RDS.aspx", params=params).json()
有一点需要注意,0001
在这里或在你的broswer中都不起作用,工作是00001
。