我使用的是python v3.6。对于我正在关注的youtube教程。出于某种原因,当我运行此代码时,我看到csv文件已保存(恰好是47个文件)但不是全部。有人可以指点我做错了什么。
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
import pandas as pd
import pandas_datareader.data as web
import bs4 as bs
import pickle
import requests
import os
import csv
import numpy as np
def tsx_tickers():
resp = requests.get('http://web.tmxmoney.com/indices.php?section=tsx&index=%5ETSX')
soup = bs.BeautifulSoup(resp.text, "lxml")
table = soup.find('table', {'class': 'indices-table'})
tickers = []
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[1].text
tickers.append(ticker.replace(".","-") + ".TO")
with open("tsxticker.pickle", "wb") as f:
pickle.dump(tickers, f)
print(tickers)
return tickers
def get_data_from_yahoo(reload_tsx = False):
if reload_tsx:
tickers = tsx_tickers()
else:
with open("tsxticker.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2000, 1, 1)
end = dt.datetime(2016, 12, 13)
for ticker in tickers:
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
time.sleep(2)
df = web.DataReader(ticker, 'yahoo', start, end)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print('Already have {}'.format(ticker))
我一直收到此错误
Traceback (most recent call last):
File "C:\Users\X\Desktop\xfile\sentdex.py", line 77, in <module>
get_data_from_yahoo()
File "C:\Users\X\Desktop\xfile\sentdex.py", line 72, in get_data_from_yahoo
df = web.DataReader(ticker, 'yahoo', start, end)
File "C:\Users\X\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pandas_datareader\data.py", line 116, in DataReader
session=session).read()
File "C:\Users\X\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pandas_datareader\yahoo\daily.py", line 76, in read
df = super(YahooDailyReader, self).read()
File "C:\Users\X\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pandas_datareader\base.py", line 155, in read
df = self._read_one_data(self.url, params=self._get_params(self.symbols))
File "C:\Users\X\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pandas_datareader\base.py", line 74, in _read_one_data
out = self._read_url_as_StringIO(url, params=params)
File "C:\Users\X\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pandas_datareader\base.py", line 85, in _read_url_as_StringIO
response = self._get_response(url, params=params)
File "C:\Users\X\AppData\Local\Programs\Python\Python36-32\lib\site-packages\pandas_datareader\base.py", line 120, in _get_response
raise RemoteDataError('Unable to read URL: {0}'.format(url))
pandas_datareader._utils.RemoteDataError: Unable to read URL: http://ichart.finance.yahoo.com/table.csv?s=OTEX.TO&a=0&b=1&c=2015&d=11&e=13&f=2016&g=d&ignore=.csv
答案 0 :(得分:0)
这似乎是一个雅虎问题,因为它再次开始工作......
答案 1 :(得分:0)
在抓取时,你需要意识到许多提供商(特别是像谷歌,雅虎这样的大品牌)都有试图阻止自动请求的对策。速率限制是避免被击中的一个很好的步骤,但2秒甚至可能太快。
无论如何,使用请求等以更加可控的方式下载数据可能会更好,它们不会阻塞有效的HTTP响应,并会保存内容供您查看。
response = requests.get('http://yahoo.com/finance...')
# server 429's because it doesn't like you, or might 200 and just have malformed content
try:
parse(response.content)
except:
with open('dump.log', 'wb') as f:
f.write(response.content)
raise
然后你可以查看转储的内容并查看它失败的原因,和/或加载转储并将其内容传递给parse()
以检查它现在是否有效。
如果您的解析失败,您也可以将其切换并重试。
MAX_ATTEMPTS = 3
# ...
for ticker in tickers:
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
for attempt in range(MAX_ATTEMPTS):
time.sleep(2)
try:
df = web.DataReader(ticker, 'yahoo', start, end)
except Exception as e:
if attempt < MAX_ATTEMPTS - 1:
print('Attempt {}: {}'.format(attempt + 1, str(e)))
else:
raise
else:
break