Question

#!/usr/bin/python
# -*- coding: utf-8 -*-

# price_retrieval.py

from __future__ import print_function

import datetime
import warnings

import MySQLdb as mdb
import requests


# Obtain a database connection to the MySQL instance
db_host = 'localhost'
db_user = 'sec_user'
db_pass = 'password'
db_name = 'securities_master'
con = mdb.connect(db_host, db_user, db_pass, db_name)


def obtain_list_of_db_tickers():
    """
    Obtains a list of the ticker symbols in the database.
    """
    with con: 
        cur = con.cursor()
        cur.execute("SELECT id, ticker FROM symbol")
        data = cur.fetchall()
        return [(d[0], d[1]) for d in data]


def get_daily_historic_data_yahoo(
        ticker, start_date=(2000,1,1),
        end_date=datetime.date.today().timetuple()[0:3]
    ):
    """
    Obtains data from Yahoo Finance returns and a list of tuples.

    ticker: Yahoo Finance ticker symbol, e.g. "GOOG" for Google, Inc.
    start_date: Start date in (YYYY, M, D) format
    end_date: End date in (YYYY, M, D) format
    """
    # Construct the Yahoo URL with the correct integer query parameters
    # for start and end dates. Note that some parameters are zero-based!
    ticker_tup = (
        ticker, start_date[1]-1, start_date[2], 
        start_date[0], end_date[1]-1, end_date[2], 
        end_date[0]
    )
    yahoo_url = "http://ichart.finance.yahoo.com/table.csv"
    yahoo_url += "?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s"
    yahoo_url = yahoo_url % ticker_tup

    # Try connecting to Yahoo Finance and obtaining the data
    # On failure, print an error message.
    try:
        yf_data = requests.get(yahoo_url).text.split("\n")[1:-1]
        prices = []
        for y in yf_data:
            p = y.strip().split(',')
            prices.append( 
                (datetime.datetime.strptime(p[0], '%Y-%m-%d'),
                p[1], p[2], p[3], p[4], p[5], p[6]) 
            )
    except Exception as e:
        print("Could not download Yahoo data: %s" % e)
    return prices


def insert_daily_data_into_db(
        data_vendor_id, symbol_id, daily_data
    ):
    """
    Takes a list of tuples of daily data and adds it to the
    MySQL database. Appends the vendor ID and symbol ID to the data.

    daily_data: List of tuples of the OHLC data (with 
    adj_close and volume)
    """
    # Create the time now
    now = datetime.datetime.utcnow()

    # Amend the data to include the vendor ID and symbol ID
    daily_data = [
        (data_vendor_id, symbol_id, d[0], now, now,
        d[1], d[2], d[3], d[4], d[5], d[6]) 
        for d in daily_data
    ]

    # Create the insert strings
    column_str = """data_vendor_id, symbol_id, price_date, created_date, 
                 last_updated_date, open_price, high_price, low_price, 
                 close_price, volume, adj_close_price"""
    insert_str = ("%s, " * 11)[:-2]
    final_str = "INSERT INTO daily_price (%s) VALUES (%s)" % \
        (column_str, insert_str)

    # Using the MySQL connection, carry out an INSERT INTO for every symbol
    with con: 
        cur = con.cursor()
        cur.executemany(final_str, daily_data)


if __name__ == "__main__":
    # This ignores the warnings regarding Data Truncation
    # from the Yahoo precision to Decimal(19,4) datatypes
    warnings.filterwarnings('ignore')

    # Loop over the tickers and insert the daily historical
    # data into the database
    tickers = obtain_list_of_db_tickers()
    lentickers = len(tickers)
    for i, t in enumerate(tickers):
        print(
            "Adding data for %s: %s out of %s" % 
            (t[1], i+1, lentickers)
        )
        yf_data = get_daily_historic_data_yahoo(t[1])
        insert_daily_data_into_db('1', t[0], yf_data)
    print("Successfully added Yahoo Finance pricing data to DB.")

#!/usr/bin/python
# -*- coding: utf-8 -*-

# retrieving_data.py

from __future__ import print_function

import pandas as pd
import MySQLdb as mdb


if __name__ == "__main__":
    # Connect to the MySQL instance
    db_host = 'localhost'
    db_user = 'sec_user'
    db_pass = 'password'
    db_name = 'securities_master'
    con = mdb.connect(db_host, db_user, db_pass, db_name)

    # Select all of the historic Google adjusted close data
    sql = """SELECT dp.price_date, dp.adj_close_price
             FROM symbol AS sym
             INNER JOIN daily_price AS dp
             ON dp.symbol_id = sym.id
             WHERE sym.ticker = 'GOOG'
             ORDER BY dp.price_date ASC;"""

    # Create a pandas dataframe from the SQL query
    goog = pd.read_sql_query(sql, con=con, index_col='price_date')    

    # Output the dataframe tail
    print(goog.tail())

CREATE TABLE exchange (
  id int NOT NULL AUTO_INCREMENT,
  abbrev varchar(32) NOT NULL,
  name varchar(255) NOT NULL,
  city varchar(255) NULL,
  country varchar(255) NULL,
  currency varchar(64) NULL,
  timezone_offset time NULL,
  created_date datetime NOT NULL,
  last_updated_date datetime NOT NULL,
  PRIMARY KEY (id)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE data_vendor (
  id int NOT NULL AUTO_INCREMENT,
  name varchar(64) NOT NULL,
  website_url varchar(255) NULL,
  support_email varchar(255) NULL,
  created_date datetime NOT NULL,
  last_updated_date datetime NOT NULL,
  PRIMARY KEY (id)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE symbol (
  id int NOT NULL AUTO_INCREMENT,
  exchange_id int NULL,
  ticker varchar(32) NOT NULL,
  instrument varchar(64) NOT NULL,
  name varchar(255) NULL,
  sector varchar(255) NULL,
  currency varchar(32) NULL,
  created_date datetime NOT NULL,
  last_updated_date datetime NOT NULL,
  PRIMARY KEY (id),
  KEY index_exchange_id (exchange_id)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE daily_price (
  id int NOT NULL AUTO_INCREMENT,
  data_vendor_id int NOT NULL,
  symbol_id int NOT NULL,
  price_date datetime NOT NULL,
  created_date datetime NOT NULL,
  last_updated_date datetime NOT NULL,
  open_price decimal(19,4) NULL,
  high_price decimal(19,4) NULL,
  low_price decimal(19,4) NULL,
  close_price decimal(19,4) NULL,
  adj_close_price decimal(19,4) NULL,
  volume bigint NULL,
  PRIMARY KEY (id),
  KEY index_data_vendor_id (data_vendor_id),
  KEY index_symbol_id (symbol_id)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

#!/usr/bin/python
# -*- coding: utf-8 -*-

# insert_symbols.py

from __future__ import print_function

import datetime
from math import ceil

import bs4
import MySQLdb as mdb
import requests


def obtain_parse_wiki_snp500():
    """
    Download and parse the Wikipedia list of S&P500 
    constituents using requests and BeautifulSoup.

    Returns a list of tuples for to add to MySQL.
    """
    # Stores the current time, for the created_at record
    now = datetime.datetime.utcnow()

    # Use requests and BeautifulSoup to download the 
    # list of S&P500 companies and obtain the symbol table
    response = requests.get(
        "http://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    )
    soup = bs4.BeautifulSoup(response.text)

    # This selects the first table, using CSS Selector syntax
    # and then ignores the header row ([1:])
    symbolslist = soup.select('table')[0].select('tr')[1:]

    # Obtain the symbol information for each 
    # row in the S&P500 constituent table
    symbols = []
    for i, symbol in enumerate(symbolslist):
        tds = symbol.select('td')
        symbols.append(
            (
                tds[0].select('a')[0].text,  # Ticker
                'stock', 
                tds[1].select('a')[0].text,  # Name
                tds[3].text,  # Sector
                'USD', now, now
            ) 
        )
    return symbols


def insert_snp500_symbols(symbols):
    """
    Insert the S&P500 symbols into the MySQL database.
    """
    # Connect to the MySQL instance
    db_host = 'localhost'
    db_user = 'sec_user'
    db_pass = 'password'
    db_name = 'securities_master'
    con = mdb.connect(host=db_host, user=db_user, passwd=db_pass, db=db_name)

    # Create the insert strings
    column_str = "ticker, instrument, name, sector, currency, created_date, last_updated_date"
    insert_str = ("%s, " * 7)[:-2]
    final_str = "INSERT INTO symbol (%s) VALUES (%s)" % \
        (column_str, insert_str)
        

    # Using the MySQL connection, carry out 
    # an INSERT INTO for every symbol
    with con: 
        cur = con.cursor()
        cur.executemany(final_str, symbols)


if __name__ == "__main__":
    symbols = obtain_parse_wiki_snp500()
    insert_snp500_symbols(symbols)
    print("%s symbols were successfully added." % len(symbols))

从下面提供的第一个屏幕截图中可以看出，“ ticker”清楚地显示为表格符号中的列之一。但是，当我运行以下代码时，它会返回“空集”，好像它不存在（或者至少我认为是它的含义）：SHOW COLUMNS FROM symbol LIKE 'ticker';

第二个屏幕截图显示了我遇到问题的代码（错误是“字段列表”中的“未知列”代码”）。此错误是我执行屏幕截图1中所示测试的原因。

----答案1解决了以上问题

现在，运行此特定代码的最后一部分，称为“ retrieving_data”时，出现错误。脚本的输出应如下：

price_date adj_close_price

2015-06-09 526.69

2015-06-10 536.69

2015-06-11 534.61

2015-06-12 532.33

2015-06-15 527.20

相反，我收到了一堆运行时警告和这个微不足道的输出（请参见屏幕快照3）： Empty DataFrame Columns: [adj_close_price] Index: []

关于我可能做错了什么的任何输入？预先感谢您的帮助。

更新：没关系，我意识到该脚本不起作用，因为yahoo不再起作用。

[屏幕截图] [1]：https://i.stack.imgur.com/hrKjG.png

[screenshot2] [2]：https://i.stack.imgur.com/QD9l6.png

[screenshot3] [3]：https://i.stack.imgur.com/XasR9.png

[screenshot4] [4]：https://i.stack.imgur.com/KrX8B.png

Answer 1

您的屏幕截图显示了‘id‘表中的列名，例如‘ticker‘和symbol。据我从您的屏幕截图中可以看到，您的列名以Unicode字符‘开头和结尾，以单引号引起来。您的列名为‘ticker‘，但是查询引用了名为ticker的列，但找不到。

MySQL喜欢用反引号分隔列名

字符，不同于单引号字符。但是，只要它们不是保留字并且不包含空格，就不必完全分隔列名。

您已经设法通过表名中的那些额外字符来创建表。仔细查看创建该表的方式。

无法在MSQL中显示列

1 个答案: