我尝试向Yahoo Finance发出多个链接请求,然后分别返回Income Statement
,Balance Sheet
和Cash Flow
的数组。我发现自己陷入了很多for loops
。我想知道下面这段代码是否有更好的版本:
def scrapper(symbol):
htmls = []
soup = []
gen_table = []
IS = "http://finance.yahoo.com/q/is?s={}+Income+Statement&annual".format(symbol)
BS = "http://finance.yahoo.com/q/is?s={}+Balance+Sheet&annual".format(symbol)
CF = "http://finance.yahoo.com/q/is?s={}+Cash+Flow&annual".format(symbol)
urls = [IS, BS, CF]
# read each link in urls
for url in urls:
with urllib.request.urlopen(url) as response:
htmls.append(response.read())
# parse data with BeautifulSoup
for html in htmls:
soup.append(BeautifulSoup(html))
# store income statement, balance sheet and cash flow into soup
for s in soup:
gen_table.append(s.find_all("table", class_="yfnc_tabledata1"))
return gen_table
答案 0 :(得分:1)
我可能会这样做:
from bs4 import BeautifulSoup
import urllib
def fetch_table(symbol, table):
url = "http://finance.yahoo.com/q/is?s={}+{}&annual".format(symbol, table)
with urllib.request.urlopen(url) as response:
result = response.read()
result = BeautifulSoup(result)
result = result.find_all("table", class_="yfnc_tabledata1")
return result
def scrapper(symbol):
return [fetch_table(symbol, table)
for table in (
"Income+Statement",
"Balance+Sheet",
"Cash+Flow")]
print (scrapper("X"))