driver = webdriver.Chrome(r'XXXX\chromedriver.exe')
FB_bloomberg_URL = 'https://www.bloomberg.com/quote/FB:US'
driver.get(FB_bloomberg_URL)
board_members = driver.find_elements_by_xpath('//* [@id="root"]/div/div/section[3]/div[10]/div[1]/div[2]/div/div[2]')[0]
board=board_members.text
board.split('\n')
我编写了上面的代码,以抄录彭博提供的FaceBook板信息。但是我很难提取所有董事会成员,因为其他成员隐藏在“查看更多”后面。如何提取所有名称?
感谢您的帮助。
答案 0 :(得分:0)
您可以使用requests
进行全部操作,并获取适当的cookie以从先前的GET
传递给API。单击view more
链接并检查网络流量时,可以在“网络”标签中找到该API。
import requests
headers = {
'dnt': '1',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36',
'accept': '*/*',
'referer': 'https://www.bloomberg.com/quote/FB:US',
'authority': 'www.bloomberg.com',
'cookie':''
}
with requests.Session() as s:
r = s.get('https://www.bloomberg.com/quote/FB:US')
headers['cookie'] = s.cookies.get_dict()['_pxhd']
r = s.get('https://www.bloomberg.com/markets2/api/peopleForCompany/11092218', headers = headers).json()
board_members = [item['name'] for item in r['boardMembers']]
print(board_members)