我正在运行以下程序来提取股票信息:
import datetime
import pandas as pd
from pandas import DataFrame
from pandas.io.data import DataReader
symbols_list = ['AAPL', 'TSLA', 'YHOO','GOOG', 'MSFT','ALTR','WDC','KLAC']
symbols=[]
for ticker in symbols_list:
r = DataReader(ticker, "yahoo",
start=datetime.datetime(2015, 4, 17))
# add a symbol column
r['Symbol'] = ticker
symbols.append(r)
# concatenate all the dfs
df = pd.concat(symbols)
#define cell with the columns that i need
cell= df[['Symbol','Open','High','Low','Adj Close','Volume']]
#changing sort of Symbol (ascending) and Date(descending) setting Symbol as first column and changing date format
cell.reset_index().sort(['Symbol', 'Date'], ascending=[1,0]).set_index('Symbol').to_csv('stock.csv', date_format='%d/%m/%Y')
这完美运行。但是当我将开始日期更改为今天(即2015年,4,20)时,程序错误输出。我也试过给出结束日期,但没有用。以下是我得到的错误:
UnboundLocalError Traceback (most recent call last)
<ipython-input-38-a05c721d551a> in <module>()
8 for ticker in symbols_list:
9 r = DataReader(ticker, "yahoo",
---> 10 start=datetime.datetime(2015, 4, 20))
11 # add a symbol column
12 r['Symbol'] = ticker
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in DataReader(name, data_source, start, end, retry_count, pause)
75 return get_data_yahoo(symbols=name, start=start, end=end,
76 adjust_price=False, chunksize=25,
---> 77 retry_count=retry_count, pause=pause)
78 elif data_source == "google":
79 return get_data_google(symbols=name, start=start, end=end,
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in get_data_yahoo(symbols, start, end, retry_count, pause, adjust_price, ret_index, chunksize, interval)
418 raise ValueError("Invalid interval: valid values are 'd', 'w', 'm' and 'v'")
419 return _get_data_from(symbols, start, end, interval, retry_count, pause,
--> 420 adjust_price, ret_index, chunksize, 'yahoo')
421
422
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in _get_data_from(symbols, start, end, interval, retry_count, pause, adjust_price, ret_index, chunksize, source)
359 # If a single symbol, (e.g., 'GOOG')
360 if isinstance(symbols, (compat.string_types, int)):
--> 361 hist_data = src_fn(symbols, start, end, interval, retry_count, pause)
362 # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
363 elif isinstance(symbols, DataFrame):
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in _get_hist_yahoo(sym, start, end, interval, retry_count, pause)
206 '&g=%s' % interval +
207 '&ignore=.csv')
--> 208 return _retry_read_url(url, retry_count, pause, 'Yahoo!')
209
210
/usr/local/lib/python2.7/site-packages/pandas/io/data.pyc in _retry_read_url(url, retry_count, pause, name)
175 #Get rid of unicode characters in index name.
176 try:
--> 177 rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore')
178 except AttributeError:
179 #Python 3 string has no decode method.
UnboundLocalError: local variable 'rs' referenced before assignment
答案 0 :(得分:0)
将@JohnE的建议汇总在一起,下面的代码似乎可以完成这项工作:
import pandas as pd
symbols_list = ['AAPL', 'TSLA', 'YHOO','GOOG', 'MSFT','ALTR','WDC','KLAC']
result = []
for ticker in symbols_list:
url = 'http://chartapi.finance.yahoo.com/instrument/1.0/%s/chartdata;type=quote;range=1d/csv' % ticker.lower()
data = pd.read_csv(url, skiprows=17)
data.columns = ['timestamp', 'close', 'high', 'low', 'open', 'close']
data['ticker'] = ticker
result.append(data)
pd.concat(result)
结果如下:
timestamp close high low open close ticker
0 1429536719 125.5500 125.5700 125.4170 125.5100 183600 AAPL
1 1429536772 125.5900 125.6399 125.4600 125.5200 215000 AAPL
2 1429536835 125.7500 125.8000 125.5600 125.5901 348500 AAPL
...
367 1429559941 58.5700 58.5800 58.5400 58.5800 119100 KLAC
368 1429559946 58.5700 58.5700 58.5700 58.5700 0 KLAC
369 1429560000 58.5600 58.5600 58.5600 58.5600 0 KLAC