Python - 使用Pandas从Google财经中获取

时间:2018-02-20 22:50:49

标签: python python-3.x pandas google-finance pandas-datareader

我尝试使用Pandas和Pandas Datareader从Google财经中提取数据。 这是我的代码:

#Importing libraries needed for pulls from Google
from pandas_datareader import data
import pandas as pd
import datetime
from datetime import date

#Define the instruments to download.  In this case: Apple, Microsoft, and 
the S&P500 index
tickers = ['APPL', 'MSFT', 'SPY']
start_date = datetime.datetime(2017, 12, 1)
end_date = datetime.datetime(2017, 12, 31)

#Use pandas_reader.data.DataReader to load the desired data
panel_data = data.DataReader('SPY', 'google', start_date, end_date)
#Getting just the adjusted closing prices.  This will return a Pandas DataFrame
#The index in this DataFrame is the major index of the panel_data.
close = panel_data.ix['Close']

#Getting all weekdays within date range.
all_weekdays = pd.date_range(start=start_date, end=end_date, freq='B')

#How do we align the existing prices in the adj_close with out new set of dates?
#All we need to do is reindex close using all_weekdays as the new index.
close = close.reindex(all_weekdays)

close.head(10)

这是控制台输出:

runfile('C:/Users/kjohn_000/.spyder-py3/temp.py', wdir='C:/Users/kjohn_000/.spyder-py3')
C:\Users\kjohn_000\Anaconda3\lib\site-packages\pandas_datareader\base.py:201: SymbolWarning: Failed to read symbol: 
'APPL', replacing with NaN.
  warnings.warn(msg.format(sym), SymbolWarning)
C:\Users\kjohn_000\Anaconda3\lib\site-
packages\pandas_datareader\base.py:201: SymbolWarning: Failed to read 
symbol: 'MSFT', replacing with NaN.
  warnings.warn(msg.format(sym), SymbolWarning)
C:\Users\kjohn_000\Anaconda3\lib\site-packages\pandas_datareader\base.py:201: SymbolWarning: Failed to read symbol: 
'SPY', replacing with NaN.
  warnings.warn(msg.format(sym), SymbolWarning)
Traceback (most recent call last):

  File "<ipython-input-2-0ddd75de0396>", line 1, in <module>
    runfile('C:/Users/kjohn_000/.spyder-py3/temp.py', 
wdir='C:/Users/kjohn_000/.spyder-py3')

  File "C:\Users\kjohn_000\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
    execfile(filename, namespace)

  File "C:\Users\kjohn_000\Anaconda3\lib\site-
packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Users/kjohn_000/.spyder-py3/temp.py", line 14, in <module>
    panel_data = data.DataReader(tickers, dataSource, start_date, end_date)

  File "C:\Users\kjohn_000\Anaconda3\lib\site-packages\pandas_datareader\data.py", line 137, in DataReader
session=session).read()

  File "C:\Users\kjohn_000\Anaconda3\lib\site-
packages\pandas_datareader\base.py", line 186, in read
    df = self._dl_mult_symbols(self.symbols)

  File "C:\Users\kjohn_000\Anaconda3\lib\site-
packages\pandas_datareader\base.py", line 206, in _dl_mult_symbols
    raise RemoteDataError(msg.format(self.__class__.__name__))

RemoteDataError: No data fetched using 'GoogleDailyReader'

为什么Pandas Datareader无法读取&#39;代码中的股票代码?清单?我现在几个小时都在寻找答案,但很多答案都是关于雅虎API的回答问题,剩下的答案要么是针对另一种语言,要么只是因为编码而已经超出我的深度(I&# 39;相对较新的Python)。提前感谢您的帮助和反馈。

2 个答案:

答案 0 :(得分:1)

这适用于Python 3.6.1

from pandas_datareader import data
import fix_yahoo_finance as yf
yf.pdr_override() 

symbol = 'AMZN'
data_source='google'
start_date = '2010-01-01'
end_date = '2016-01-01'
df = data.get_data_yahoo(symbol, start_date, end_date)
print(df)
df.head()

这也适合我。

from urllib.request import urlopen
from bs4 import BeautifulSoup as bs

def get_historical_data(name, number_of_days):
    data = []
    url = "https://finance.yahoo.com/quote/" + name + "/history/"
    rows = bs(urlopen(url).read()).findAll('table')[0].tbody.findAll('tr')

    for each_row in rows:
        divs = each_row.findAll('td')
        if divs[1].span.text  != 'Dividend': #Ignore this row in the table
            #I'm only interested in 'Open' price; For other values, play with divs[1 - 5]
            data.append({'Date': divs[0].span.text, 'Open': float(divs[1].span.text.replace(',',''))})

    return data[:number_of_days]

#Test
for i in get_historical_data('googl', 25):   
    print(i)

答案 1 :(得分:0)

这不是使用Google,但是,如果您使用python YahooFinancials模块,则可以轻松地将金融数据加载到熊猫中。 YahooFinancials通过散列相关Yahoo Finance Page的数据存储对象来获取财务数据,因此它非常快速,构建良好,并且不依赖于旧的停产api或像web scraper一样的webdriver。数据以JSON返回。

$ pip安装yahoofinancials

用法示例:

from yahoofinancials import YahooFinancials
import pandas as pd

# Select Tickers and stock history dates
ticker = 'AAPL'
ticker2 = 'MSFT'
ticker3 = 'INTC'
index = '^NDX'
freq = 'daily'
start_date = '2012-10-01'
end_date = '2017-10-01'


# Function to clean data extracts
def clean_stock_data(stock_data_list):
    new_list = []
    for rec in stock_data_list:
        if 'type' not in rec.keys():
            new_list.append(rec)
    return new_list

# Construct yahoo financials objects for data extraction
aapl_financials = YahooFinancials(ticker)
mfst_financials = YahooFinancials(ticker2)
intl_financials = YahooFinancials(ticker3)
index_financials = YahooFinancials(index)

# Clean returned stock history data and remove dividend events from price history
daily_aapl_data = clean_stock_data(aapl_financials.get_historical_stock_data(start_date, end_date, freq)[ticker]['prices'])
daily_msft_data = clean_stock_data(mfst_financials.get_historical_stock_data(start_date, end_date, freq)[ticker2]['prices'])
daily_intl_data = clean_stock_data(intl_financials.get_historical_stock_data(start_date, end_date, freq)[ticker3]['prices'])
daily_index_data = index_financials.get_historical_stock_data(start_date, end_date, freq)[index]['prices']
stock_hist_data_list = [{'NDX': daily_index_data}, {'AAPL': daily_aapl_data}, {'MSFT': daily_msft_data}, {'INTL': daily_intl_data}]


# Function to construct data frame based on a stock and it's market index
def build_data_frame(data_list1, data_list2, data_list3, data_list4):
    data_dict = {}
    i = 0
    for list_item in data_list2:
        if 'type' not in list_item.keys():
            data_dict.update({list_item['formatted_date']: {'NDX': data_list1[i]['close'], 'AAPL': list_item['close'],
                                                            'MSFT': data_list3[i]['close'],
                                                            'INTL': data_list4[i]['close']}})
            i += 1
    tseries = pd.to_datetime(list(data_dict.keys()))
    df = pd.DataFrame(data=list(data_dict.values()), index=tseries,
                      columns=['NDX', 'AAPL', 'MSFT', 'INTL']).sort_index()
    return df