从谷歌财务下载股票数据

时间:2014-06-24 02:09:59

标签: python pandas web-scraping

我试图从.csv文件中的股票代码列表中下载Google财经数据。

这是我试图从这个site改编的课程:

import urllib,time,datetime
import csv

class Quote(object):

  DATE_FMT = '%Y-%m-%d'
  TIME_FMT = '%H:%M:%S'

  def __init__(self):
    self.symbol = ''
    self.date,self.time,self.open_,self.high,self.low,self.close,self.volume = ([] for _ in range(7))

  def append(self,dt,open_,high,low,close,volume):
    self.date.append(dt.date())
    self.time.append(dt.time())
    self.open_.append(float(open_))
    self.high.append(float(high))
    self.low.append(float(low))
    self.close.append(float(close))
    self.volume.append(int(volume))


  def append_csv(self, filename):
    with open(filename, 'a') as f:
      f.write(self.to_csv())

  def __repr__(self):
    return self.to_csv()

  def get_symbols(self, filename):
    for line in open(filename,'r'):
      if line != 'codigo':
        print line
        q = GoogleQuote(line,'2014-01-01','2014-06-20')
        q.append_csv('data.csv')


class GoogleQuote(Quote):
  ''' Daily quotes from Google. Date format='yyyy-mm-dd' '''
  def __init__(self,symbol,start_date,end_date=datetime.date.today().isoformat()):
    super(GoogleQuote,self).__init__()
    self.symbol = symbol.upper()
    start = datetime.date(int(start_date[0:4]),int(start_date[5:7]),int(start_date[8:10]))
    end = datetime.date(int(end_date[0:4]),int(end_date[5:7]),int(end_date[8:10]))
    url_string = "http://www.google.com/finance/historical?q={0}".format(self.symbol)
    url_string += "&startdate={0}&enddate={1}&output=csv".format(
                      start.strftime('%b %d, %Y'),end.strftime('%b %d, %Y'))
    csv = urllib.urlopen(url_string).readlines()
    csv.reverse()
for bar in xrange(0,len(csv)-1):
  try: 
    #ds,open_,high,low,close,volume = csv[bar].rstrip().split(',')
    #open_,high,low,close = [float(x) for x in [open_,high,low,close]]
    #dt = datetime.datetime.strptime(ds,'%d-%b-%y')
    #self.append(dt,open_,high,low,close,volume)
    data = csv[bar].rstrip().split(',')
    dt = datetime.datetime.strftime(data[0],'%d-%b-%y')
    close = data[4]
    self.append(dt,close)
  except:
    print "error " + str(len(csv)-1)
    print "error " + csv[bar]


if __name__ == '__main__':
  q = Quote()                                       # create a generic quote object
  q.get_symbols('list.csv')

但是,对于某些引号,代码不会返回所有数据(例如BIOM3),某些字段会返回' - '。在这些情况下如何处理拆分? 最后,在脚本的某个时刻,它停止下载数据,因为脚本停止,它不会返回任何消息。我该如何处理这个问题?

1 个答案:

答案 0 :(得分:2)

它应该有效,但请注意该代码应为: BVMF:ABRE11

In [250]:

import pandas.io.data as web
import datetime
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2013, 1, 27)
df=web.DataReader("BVMF:ABRE11", 'google', start, end)
print df.head(10)
             Open   High    Low  Close   Volume
?Date                                          
2011-07-26  19.79  19.79  18.30  18.50  1843700
2011-07-27  18.45  18.60  17.65  17.89  1475100
2011-07-28  18.00  18.50  18.00  18.30   441700
2011-07-29  18.30  18.84  18.20  18.70   392800
2011-08-01  18.29  19.50  18.29  18.86   217800
2011-08-02  18.86  18.86  18.60  18.80   154600
2011-08-03  18.90  18.90  18.00  18.00   168700
2011-08-04  17.50  17.85  16.50  16.90   238700
2011-08-05  17.00  17.00  15.63  16.00   253000
2011-08-08  15.50  15.96  14.35  14.50   224300

[10 rows x 5 columns]

In [251]:

df=web.DataReader("BVMF:BIOM3", 'google', start, end)
print df.head(10)
            Open  High   Low  Close  Volume
?Date                                      
2010-01-04  2.90  2.90  2.90   2.90       0
2010-01-05  3.00  3.00  3.00   3.00       0
2010-01-06  3.01  3.01  3.01   3.01       0
2010-01-07  3.01  3.09  3.01   3.09    2000
2010-01-08  3.01  3.01  3.01   3.01       0
2010-01-11  3.00  3.00  3.00   3.00       0
2010-01-12  3.00  3.00  3.00   3.00       0
2010-01-13  3.00  3.10  3.00   3.00    7000
2010-01-14  3.00  3.00  3.00   3.00       0
2010-01-15  3.00  3.00  3.00   3.00    1000

[10 rows x 5 columns]