搜刮雅虎财经季度数据

时间:2019-04-08 18:39:14

标签: python python-3.x

下面是我的年度数据返回代码。由于季度不是一个不同的链接,而是一个按钮,我无法弄清楚如何拉它。我花了几天的时间终于求助于我。

最终游戏是具有资产负债表,现金流等的出色输出,但我需要按季度提供。

欢迎任何帮助。谢谢

import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd
import xlrd

def scrape_table(url):
    page = requests.get(url)
    tree = html.fromstring(page.content)
## page.content rather than page.text because html.fromstring implicitly expects bytes as input.)
    table = tree.xpath('//table')
##XPath is a way of locating information in structured documents such as HTML or XML 
    assert len(table) == 1

    df = pd.read_html(lxml.etree.tostring(table[0], method='html'))[0]

    df = df.set_index(0)
#    df = df.dropna()
    df = df.transpose()
    df = df.replace('-', '0')

    # The first column should be a date
    df[df.columns[0]] = pd.to_datetime(df[df.columns[0]])
    cols = list(df.columns)
    cols[0] = 'Date'
    df = df.set_axis(cols, axis='columns', inplace=False)

    numeric_columns = list(df.columns)[1::]
    df[numeric_columns] = df[numeric_columns].astype(np.float64)

    return df

loc= (r"F:\KateLaptop2019\Work\DataAnalysis\listpubliccompanies.xlsx")
wb=xlrd.open_workbook(loc)
sheet=wb.sheet_by_index(0)
sheet.cell_value(0,0)
companies=[]
for i in range(1,sheet.nrows):
        companies.append((sheet.cell_value(i,1).strip()))


def annual_financials():    
    for item in companies:
            try:
                balance_sheet_url = 'https://finance.yahoo.com/quote/' + item + '/balance-sheet?p=' + item
                download_destination = (r'F:\KateLaptop2019\Work\DataAnalysis\OilCompanyResearch\CompanyFinancials\BalanceSheet\\' + item + ".xlsx")
                df_balance_sheet = scrape_table(balance_sheet_url)
                df_balance_sheet.to_excel(download_destination)

            except:
                print(item,"key error")
                pass
annual_financials()

0 个答案:

没有答案