我当前正在尝试创建股票信息脚本。但是,我只能从股票的初始页面而不是关键统计信息页面检索数据。这是我要从中获取数据的页面: https://au.finance.yahoo.com/quote/TICKER/key-statistics?p=TICKER
以下是我正在使用的代码:(主要来自我观看的视频)
# -*- coding: utf-8 -*-
import os, sys
import csv
from bs4 import BeautifulSoup
import urllib3
import xlsxwriter
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
key_stats_on_main =['Market cap', 'PE ratio (TTM)', 'EPS (TTM)']
key_stats_on_stat =['Enterprise value', 'Trailing P/E', 'Forward P/E',
'PEG Ratio (5-yr expected)', 'Return on assets', 'Quarterly revenue growth (yoy)',
'EBITDA', 'Diluted EPS (ttm)', 'Total debt/equity (mrq)', 'Current ratio (mrq)']
stocks_arr =[]
pfolio_file= open("/Users/z_hutcho/Documents/Programming/yfinance/stocks.csv", "r")
for line in pfolio_file:
indv_stock_arr = line.strip().split(',')
stocks_arr.append(indv_stock_arr)
print(stocks_arr)
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=2560x1600")
chrome_driver = "/usr/local/bin/chromedriver"
browser = webdriver.Chrome(options=chrome_options, executable_path=chrome_driver)
stock_info_arr = []
for stock in stocks_arr:
stock_info = []
ticker = stock[0]
stock_info.append(ticker)
url = "https://au.finance.yahoo.com/quote/{0}?p={0}".format(ticker)
url2 = "https://au.finance.yahoo.com/quote/{0}/key-statistics?p={0}".format(ticker)
browser.get(url)
innerHTML = browser.execute_script("return document.body.innerHTML")
soup = BeautifulSoup(innerHTML, 'html.parser')
for stat in key_stats_on_main:
page_stat1 = soup.find(text=stat)
try:
page_row1 = page_stat1.find_parent('tr')
try:
page_statnum1 = page_row1.find_all('span')[1].contents[1].get_text(strip=True)
print(page_statnum1)
except:
page_statnum1 = page_row1.find_all('td')[1].contents[0].get_text(strip=True)
print(page_statnum1)
except:
print('Invalid parent for this element')
page_statnum1 = "N/A"
stock_info.append(page_statnum1)
browser.get(url2)
innerHTML2 = browser.execute_script("return document.body.innerHTML")
soup2 = BeautifulSoup(innerHTML2, 'html.parser')
for stat in key_stats_on_stat:
page_stat2 = soup2.find(text=stat)
try:
page_row2 = page_stat2.find_parent('tr')
try:
page_statnum2 = page_row2.find_all('span')[1].contents[1].get_text(strip=True)
print(page_statnum2)
except:
page_statnum2 = page_row2.find_all('td')[1].contents[0].get_text(strip=True)
print(page_statnum2)
except:
print('Invalid parent for this element')
page_statnum2 = 'N/A'
stock_info.append(page_statnum2)
stock_info_arr.append(stock_info)
print(stock_info_arr)
对于股票行情AAPL,结果如下:
1.3T
24.52
11.89
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
不太确定为什么第二页没有被正确地刮掉...我对漂亮汤并不十分有经验。任何帮助将不胜感激。