我试图用BeautifulSoup抓一个网站。更具体地说,我试图从以下标签中获取字符串:
<td class="Fz(s) Fw(500) Ta(end)" data-reactid=".17c0h26fqwq.1.$0.0.0.3.1.$main-0-Quote-Proxy.$main 0-Quote.2.0.0.0.1.0.0:$VALUATION_MEASURES.0.1.0.$MARKET_CAP_INTRADAY.1">4.39B</td>
但是,当我尝试寻找所有td标签的attrs时,BeautifulSoup无法找到我想要的那个。这是代码:
from urllib.request import urlopen
source_code = urlopen('http://finance.yahoo.com/quote/IONS/key-statistics?p=IONS').read()
from bs4 import BeautifulSoup
yahoo_finance = BeautifulSoup(source_code, 'html.parser')
tds = yahoo_finance.find_all('td')
for td in tds:
print(td.attrs)
这是输出:
{'class': ['W(100%)', 'Va(t)', 'Px(0)'], 'data-reactid': '.odbtogw33w.0.0.$uh.2.0.1.0.1.0.0.0'}
{'class': ['Va(t)', 'Tren(os)', 'W(10%)', 'Whs(nw)', 'Px(0)', 'Bdcl(s)'], 'data-reactid': '.odbtogw33w.0.0.$uh.2.0.1.0.1.0.0.1'}
所以,它没有找到&#39;:[&#39; Fz(s)&#39;,&#39; Fw(500)&#39;,&# 39; Ta(结束)&#39;]
有谁知道为什么?
戈兰
答案 0 :(得分:3)
您可以仅使用请求获取数据,内容从ajax生成,转到 https://query1.finance.yahoo.com/v10/finance/quoteSummary/IONS :
from pprint import pprint as pp
import requests
params = {"formatted": "true", "lang": "en-US", "region": "US",
"modules": "defaultKeyStatistics,financialData,calendarEvents", "corsDomain": "finance.yahoo.com"}
url = "http://finance.yahoo.com/quote/IONS/key-statistics?p=IONS"
ajax = "https://query1.finance.yahoo.com/v10/finance/quoteSummary/IONS"
with requests.Session() as s:
cont = requests.get(url).content
data = s.get(ajax, params=params).json()
pp(data[u'quoteSummary']["result"])
这会给你:
[{u'calendarEvents': {u'dividendDate': {},
u'earnings': {u'earningsAverage': {u'fmt': u'-0.53',
u'raw': -0.53},
u'earningsDate': [{u'fmt': u'2016-08-09',
u'raw': 1470700800}],
u'earningsHigh': {u'fmt': u'-0.39',
u'raw': -0.39},
u'earningsLow': {u'fmt': u'-0.75',
u'raw': -0.75},
u'revenueAverage': {u'fmt': u'37.69M',
u'longFmt': u'37,690,000',
u'raw': 37690000},
u'revenueHigh': {u'fmt': u'56M',
u'longFmt': u'56,000,000',
u'raw': 56000000},
u'revenueLow': {u'fmt': u'25.2M',
u'longFmt': u'25,200,000',
u'raw': 25200000}},
u'exDividendDate': {},
u'maxAge': 1},
u'defaultKeyStatistics': {u'52WeekChange': {u'fmt': u'\u221e%',
u'raw': u'Infinity'},
u'SandP52WeekChange': {u'fmt': u'3.65%',
u'raw': 0.03645599},
u'annualHoldingsTurnover': {},
u'annualReportExpenseRatio': {},
u'beta': {u'fmt': u'2.35', u'raw': 2.35046},
u'beta3Year': {},
u'bookValue': {u'fmt': u'1.31', u'raw': 1.31},
u'category': None,
u'earningsQuarterlyGrowth': {},
u'enterpriseToEbitda': {u'fmt': u'-37.62',
u'raw': -37.618},
u'enterpriseToRevenue': {u'fmt': u'15.86',
u'raw': 15.864},
u'enterpriseValue': {u'fmt': u'4.09B',
u'longFmt': u'4,092,714,240',
u'raw': 4092714240},
u'fiveYearAverageReturn': {},
u'floatShares': {u'fmt': u'119.83M',
u'longFmt': u'119,833,635',
u'raw': 119833635},
u'forwardEps': {u'fmt': u'-1.14', u'raw': -1.14},
u'forwardPE': {u'fmt': u'-31.87',
u'raw': -31.868423},
u'fundFamily': None,
u'fundInceptionDate': {},
u'heldPercentInsiders': {},
u'heldPercentInstitutions': {},
u'lastCapGain': {},
u'lastDividendValue': {},
u'lastFiscalYearEnd': {u'fmt': u'2015-12-31',
u'raw': 1451520000},
u'lastSplitDate': {},
u'lastSplitFactor': None,
u'legalType': None,
u'maxAge': 1,
u'morningStarOverallRating': {},
u'morningStarRiskRating': {},
u'mostRecentQuarter': {u'fmt': u'2016-03-31',
u'raw': 1459382400},
u'netIncomeToCommon': {u'fmt': u'-134.48M',
u'longFmt': u'-134,478,000',
u'raw': -134478000},
u'nextFiscalYearEnd': {u'fmt': u'2017-12-31',
u'raw': 1514678400},
u'pegRatio': {u'fmt': u'-0.76', u'raw': -0.76},
u'priceToBook': {u'fmt': u'27.73',
u'raw': 27.732826},
u'priceToSalesTrailing12Months': {},
u'profitMargins': {u'fmt': u'-52.12%',
u'raw': -0.52124},
u'revenueQuarterlyGrowth': {},
u'sharesOutstanding': {u'fmt': u'120.78M',
u'longFmt': u'120,783,000',
u'raw': 120783000},
u'sharesShort': {u'fmt': u'13.89M',
u'longFmt': u'13,890,400',
u'raw': 13890400},
u'sharesShortPriorMonth': {u'fmt': u'13.03M',
u'longFmt': u'13,032,400',
u'raw': 13032400},
u'shortPercentOfFloat': {u'fmt': u'13.66%',
u'raw': 0.13664},
u'shortRatio': {u'fmt': u'6.66', u'raw': 6.66},
u'threeYearAverageReturn': {},
u'totalAssets': {},
u'trailingEps': {u'fmt': u'-1.12',
u'raw': -1.119},
u'yield': {},
u'ytdReturn': {}},
u'financialData': {u'currentPrice': {u'fmt': u'36.33', u'raw': 36.33},
u'currentRatio': {u'fmt': u'6.14', u'raw': 6.136},
u'debtToEquity': {u'fmt': u'302.79', u'raw': 302.793},
u'earningsGrowth': {},
u'ebitda': {u'fmt': u'-108.8M',
u'longFmt': u'-108,796,000',
u'raw': -108796000},
u'ebitdaMargins': {u'fmt': u'-42.17%',
u'raw': -0.42169997},
u'freeCashflow': {u'fmt': u'15.13M',
u'longFmt': u'15,127,875',
u'raw': 15127875},
u'grossMargins': {u'fmt': u'-30.48%', u'raw': -0.30478},
u'grossProfits': {u'fmt': u'283.7M',
u'longFmt': u'283,703,000',
u'raw': 283703000},
u'maxAge': 86400,
u'numberOfAnalystOpinions': {u'fmt': u'8',
u'longFmt': u'8',
u'raw': 8},
u'operatingCashflow': {u'fmt': u'-11.82M',
u'longFmt': u'-11,817,000',
u'raw': -11817000},
u'operatingMargins': {u'fmt': u'-46.09%',
u'raw': -0.46085998},
u'profitMargins': {u'fmt': u'-52.12%',
u'raw': -0.52124},
u'quickRatio': {u'fmt': u'5.94', u'raw': 5.944},
u'recommendationKey': u'hold',
u'recommendationMean': {u'fmt': u'2.80', u'raw': 2.8},
u'returnOnAssets': {u'fmt': u'-8.12%',
u'raw': -0.08116},
u'returnOnEquity': {u'fmt': u'-61.97%',
u'raw': -0.6197},
u'revenueGrowth': {u'fmt': u'-41.10%', u'raw': -0.411},
u'revenuePerShare': {u'fmt': u'2.15', u'raw': 2.148},
u'targetHighPrice': {u'fmt': u'64.00', u'raw': 64.0},
u'targetLowPrice': {u'fmt': u'17.00', u'raw': 17.0},
u'targetMeanPrice': {u'fmt': u'39.13', u'raw': 39.13},
u'targetMedianPrice': {u'fmt': u'38.00', u'raw': 38.0},
u'totalCash': {u'fmt': u'723.51M',
u'longFmt': u'723,507,008',
u'raw': 723507008},
u'totalCashPerShare': {u'fmt': u'5.99', u'raw': 5.99},
u'totalDebt': {u'fmt': u'478.9M',
u'longFmt': u'478,904,000',
u'raw': 478904000},
u'totalRevenue': {u'fmt': u'257.99M',
u'longFmt': u'257,993,984',
u'raw': 257993984}}}]
答案 1 :(得分:0)
所以这是我编写的附加代码,现在我可以很好地保存动态生成的内容并使用BeautifulSoup获取我想要的标记:
from contextlib import closing
from selenium.webdriver import Firefox
from selenium.webdriver.support.ui import WebDriverWait
with closing(Firefox()) as browser:
browser.get('https://finance.yahoo.com/quote/IONS?p=IONS')
button = browser.find_element_by_link_text('Statistics')
button.click()
#WebDriverWait(browser, timeout=10).until(
#lambda x: x.find_element_by_class_name('Fz(s) Fw(500) Ta(end)'))
page_source = browser.page_source
print(page_source)
yahoo_finance = BeautifulSoup(page_source, 'html.parser')
@nephtes @Padraic坎宁安感谢提示。