Question

当我正在学习如何从标准普尔500的维基百科中获取数据时出现一些错误，我的目的是从Wikipedia中获取数据并使用python进行分析，所有这些都遵循了教程视频，我是python或编码的初学者，
这是我的代码

import bs4 as bs
import datetime as dt
import os
import pandas as pd
import pandas_datareader.data as web
import pickle
import requests


def save_sp500_tickers():
    resp = requests.get(
        'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, "lxml")
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker)

    with open("sp500tickers.pickle", "wb") as f:
        pickle.dump(tickers, f)
        print(tickers)
    return tickers
   #return tickers


# save_sp500_tickers()


def get_data_from_yahoo(reload_sp500=False):
    if reload_sp500:
        tickers = save_sp500_tickers()
    else:
        with open("sp500tickers.pickle", "rb") as f:
            tickers = pickle.load(f)

    if not os.path.exists('stock_dfs'):
        os.makedirs('stock_dfs')

    start = dt.datetime(2000,1,1)
    end = dt.datetime(2016,12,31)

    for ticker in tickers:
        if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
            df = web.DataReader(ticker, 'yahoo', start, end)
            df.to_csv('stock_dfs/{}.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))


get_data_from_yahoo()

我修改了所有格式和缩进错误，但终端机说

traceback (most recent call last):
  File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\yahoo\daily.py", line 157, in _read_one_data
    data = j["context"]["dispatcher"]["stores"]["HistoricalPriceStore"]
KeyError: 'HistoricalPriceStore'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:/Users/CNTHWAN8/Desktop/personal/Python/salesorder/Python/sp500 companites.py", line 52, in <module>
    get_data_from_yahoo()
  File "c:/Users/CNTHWAN8/Desktop/personal/Python/salesorder/Python/sp500 companites.py", line 46, in get_data_from_yahoo
    df = web.DataReader(ticker, 'yahoo', start, end)
  File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\util\_decorators.py", line 208, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\data.py", line 387, in DataReader
    session=session,
  File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\base.py", line 251, in read
    df = self._read_one_data(self.url, params=self._get_params(self.symbols))
  File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\yahoo\daily.py", line 160, in _read_one_data
    raise RemoteDataError(msg.format(symbol, self.__class__.__name__))
pandas_datareader._utils.RemoteDataError: No data fetched for symbol MMM
 using YahooDailyReader

C:\Users\CNTHWAN8\Desktop\personal\Python\salesorder\Python>C:/Users/CNTHWAN8/AppData/Local/Programs/Python/Python37/python.exe "c:/Users/CNTHWAN8/Desktop/personal/Python/salesorder/Python/sp500 companites.py"
Traceback (most recent call last):
  File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\yahoo\daily.py", line 157, in _read_one_data
    data = j["context"]["dispatcher"]["stores"]["HistoricalPriceStore"]
KeyError: 'HistoricalPriceStore'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:/Users/CNTHWAN8/Desktop/personal/Python/salesorder/Python/sp500 companites.py", line 51, in <module>
    get_data_from_yahoo()
  File "c:/Users/CNTHWAN8/Desktop/personal/Python/salesorder/Python/sp500 companites.py", line 45, in get_data_from_yahoo
    df = web.DataReader(ticker, 'yahoo', start, end)
  File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\util\_decorators.py", line 208, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\data.py", line 387, in DataReader
    session=session,
  File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\base.py", line 251, in read
    df = self._read_one_data(self.url, params=self._get_params(self.symbols))
  File "C:\Users\CNTHWAN8\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas_datareader\yahoo\daily.py", line 160, in _read_one_data
    raise RemoteDataError(msg.format(symbol, self.__class__.__name__))
pandas_datareader._utils.RemoteDataError: No data fetched for symbol MMM
 using YahooDailyReader

所以我有点困惑发生了什么事情以及如何解决它，任何人都可以在这里帮助我，非常感谢

Answer 1

问题是\n中的ticker-您必须剥离它才能得到ie。 MMM代替MMM\n

ticker = row.findAll('td')[0].text.strip()

此后，它开始创建csv文件。

还有其他问题。

对于BKR（及其他少数几个），它显示错误KeyError: 'Date'。从服务器读取数据可能有问题。它需要try/except才能跳过此问题。

try:
    df = web.DataReader(ticker, 'yahoo', start, end)
    df.to_csv('stock_dfs/{}.csv'.format(ticker))
except Exception as ex:
    print('Error:', ex)

import bs4 as bs
import datetime as dt
import os
import pandas as pd
import pandas_datareader.data as web
import pickle
import requests


def save_sp500_tickers():
    resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')

    soup = bs.BeautifulSoup(resp.text, "lxml")
    table = soup.find('table', {'class': 'wikitable sortable'})

    tickers = []

    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text.strip()
        tickers.append(ticker)

    with open("sp500tickers.pickle", "wb") as f:
        pickle.dump(tickers, f)
        print(tickers)

    return tickers


def get_data_from_yahoo(reload_sp500=False):
    if reload_sp500:
        tickers = save_sp500_tickers()
    else:
        with open("sp500tickers.pickle", "rb") as f:
            tickers = pickle.load(f)

    if not os.path.exists('stock_dfs'):
        os.makedirs('stock_dfs')

    start = dt.datetime(2000, 1, 1)
    end = dt.datetime(2016, 12, 31)

    for ticker in tickers:

        print(ticker)

        if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
            try:
                df = web.DataReader(ticker, 'yahoo', start, end)
                df.to_csv('stock_dfs/{}.csv'.format(ticker))
            except Exception as ex:
                print('Error:', ex)
        else:
            print('Already have {}'.format(ticker))


get_data_from_yahoo(True)

KeyError：“ HistoricalPriceStore”

1 个答案: