我试图从.csv文件中的股票代码列表中下载Google财经数据。
这是我试图从这个site改编的课程:
import urllib,time,datetime
import csv
class Quote(object):
DATE_FMT = '%Y-%m-%d'
TIME_FMT = '%H:%M:%S'
def __init__(self):
self.symbol = ''
self.date,self.time,self.open_,self.high,self.low,self.close,self.volume = ([] for _ in range(7))
def append(self,dt,open_,high,low,close,volume):
self.date.append(dt.date())
self.time.append(dt.time())
self.open_.append(float(open_))
self.high.append(float(high))
self.low.append(float(low))
self.close.append(float(close))
self.volume.append(int(volume))
def append_csv(self, filename):
with open(filename, 'a') as f:
f.write(self.to_csv())
def __repr__(self):
return self.to_csv()
def get_symbols(self, filename):
for line in open(filename,'r'):
if line != 'codigo':
print line
q = GoogleQuote(line,'2014-01-01','2014-06-20')
q.append_csv('data.csv')
class GoogleQuote(Quote):
''' Daily quotes from Google. Date format='yyyy-mm-dd' '''
def __init__(self,symbol,start_date,end_date=datetime.date.today().isoformat()):
super(GoogleQuote,self).__init__()
self.symbol = symbol.upper()
start = datetime.date(int(start_date[0:4]),int(start_date[5:7]),int(start_date[8:10]))
end = datetime.date(int(end_date[0:4]),int(end_date[5:7]),int(end_date[8:10]))
url_string = "http://www.google.com/finance/historical?q={0}".format(self.symbol)
url_string += "&startdate={0}&enddate={1}&output=csv".format(
start.strftime('%b %d, %Y'),end.strftime('%b %d, %Y'))
csv = urllib.urlopen(url_string).readlines()
csv.reverse()
for bar in xrange(0,len(csv)-1):
try:
#ds,open_,high,low,close,volume = csv[bar].rstrip().split(',')
#open_,high,low,close = [float(x) for x in [open_,high,low,close]]
#dt = datetime.datetime.strptime(ds,'%d-%b-%y')
#self.append(dt,open_,high,low,close,volume)
data = csv[bar].rstrip().split(',')
dt = datetime.datetime.strftime(data[0],'%d-%b-%y')
close = data[4]
self.append(dt,close)
except:
print "error " + str(len(csv)-1)
print "error " + csv[bar]
if __name__ == '__main__':
q = Quote() # create a generic quote object
q.get_symbols('list.csv')
但是,对于某些引号,代码不会返回所有数据(例如BIOM3),某些字段会返回' - '。在这些情况下如何处理拆分? 最后,在脚本的某个时刻,它停止下载数据,因为脚本停止,它不会返回任何消息。我该如何处理这个问题?
答案 0 :(得分:2)
它应该有效,但请注意该代码应为: BVMF:ABRE11
In [250]:
import pandas.io.data as web
import datetime
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2013, 1, 27)
df=web.DataReader("BVMF:ABRE11", 'google', start, end)
print df.head(10)
Open High Low Close Volume
?Date
2011-07-26 19.79 19.79 18.30 18.50 1843700
2011-07-27 18.45 18.60 17.65 17.89 1475100
2011-07-28 18.00 18.50 18.00 18.30 441700
2011-07-29 18.30 18.84 18.20 18.70 392800
2011-08-01 18.29 19.50 18.29 18.86 217800
2011-08-02 18.86 18.86 18.60 18.80 154600
2011-08-03 18.90 18.90 18.00 18.00 168700
2011-08-04 17.50 17.85 16.50 16.90 238700
2011-08-05 17.00 17.00 15.63 16.00 253000
2011-08-08 15.50 15.96 14.35 14.50 224300
[10 rows x 5 columns]
In [251]:
df=web.DataReader("BVMF:BIOM3", 'google', start, end)
print df.head(10)
Open High Low Close Volume
?Date
2010-01-04 2.90 2.90 2.90 2.90 0
2010-01-05 3.00 3.00 3.00 3.00 0
2010-01-06 3.01 3.01 3.01 3.01 0
2010-01-07 3.01 3.09 3.01 3.09 2000
2010-01-08 3.01 3.01 3.01 3.01 0
2010-01-11 3.00 3.00 3.00 3.00 0
2010-01-12 3.00 3.00 3.00 3.00 0
2010-01-13 3.00 3.10 3.00 3.00 7000
2010-01-14 3.00 3.00 3.00 3.00 0
2010-01-15 3.00 3.00 3.00 3.00 1000
[10 rows x 5 columns]