#!/usr/bin/python
# -*- coding: utf-8 -*-
# price_retrieval.py
from __future__ import print_function
import datetime
import warnings
import MySQLdb as mdb
import requests
# Obtain a database connection to the MySQL instance
db_host = 'localhost'
db_user = 'sec_user'
db_pass = 'password'
db_name = 'securities_master'
con = mdb.connect(db_host, db_user, db_pass, db_name)
def obtain_list_of_db_tickers():
"""
Obtains a list of the ticker symbols in the database.
"""
with con:
cur = con.cursor()
cur.execute("SELECT id, ticker FROM symbol")
data = cur.fetchall()
return [(d[0], d[1]) for d in data]
def get_daily_historic_data_yahoo(
ticker, start_date=(2000,1,1),
end_date=datetime.date.today().timetuple()[0:3]
):
"""
Obtains data from Yahoo Finance returns and a list of tuples.
ticker: Yahoo Finance ticker symbol, e.g. "GOOG" for Google, Inc.
start_date: Start date in (YYYY, M, D) format
end_date: End date in (YYYY, M, D) format
"""
# Construct the Yahoo URL with the correct integer query parameters
# for start and end dates. Note that some parameters are zero-based!
ticker_tup = (
ticker, start_date[1]-1, start_date[2],
start_date[0], end_date[1]-1, end_date[2],
end_date[0]
)
yahoo_url = "http://ichart.finance.yahoo.com/table.csv"
yahoo_url += "?s=%s&a=%s&b=%s&c=%s&d=%s&e=%s&f=%s"
yahoo_url = yahoo_url % ticker_tup
# Try connecting to Yahoo Finance and obtaining the data
# On failure, print an error message.
try:
yf_data = requests.get(yahoo_url).text.split("\n")[1:-1]
prices = []
for y in yf_data:
p = y.strip().split(',')
prices.append(
(datetime.datetime.strptime(p[0], '%Y-%m-%d'),
p[1], p[2], p[3], p[4], p[5], p[6])
)
except Exception as e:
print("Could not download Yahoo data: %s" % e)
return prices
def insert_daily_data_into_db(
data_vendor_id, symbol_id, daily_data
):
"""
Takes a list of tuples of daily data and adds it to the
MySQL database. Appends the vendor ID and symbol ID to the data.
daily_data: List of tuples of the OHLC data (with
adj_close and volume)
"""
# Create the time now
now = datetime.datetime.utcnow()
# Amend the data to include the vendor ID and symbol ID
daily_data = [
(data_vendor_id, symbol_id, d[0], now, now,
d[1], d[2], d[3], d[4], d[5], d[6])
for d in daily_data
]
# Create the insert strings
column_str = """data_vendor_id, symbol_id, price_date, created_date,
last_updated_date, open_price, high_price, low_price,
close_price, volume, adj_close_price"""
insert_str = ("%s, " * 11)[:-2]
final_str = "INSERT INTO daily_price (%s) VALUES (%s)" % \
(column_str, insert_str)
# Using the MySQL connection, carry out an INSERT INTO for every symbol
with con:
cur = con.cursor()
cur.executemany(final_str, daily_data)
if __name__ == "__main__":
# This ignores the warnings regarding Data Truncation
# from the Yahoo precision to Decimal(19,4) datatypes
warnings.filterwarnings('ignore')
# Loop over the tickers and insert the daily historical
# data into the database
tickers = obtain_list_of_db_tickers()
lentickers = len(tickers)
for i, t in enumerate(tickers):
print(
"Adding data for %s: %s out of %s" %
(t[1], i+1, lentickers)
)
yf_data = get_daily_historic_data_yahoo(t[1])
insert_daily_data_into_db('1', t[0], yf_data)
print("Successfully added Yahoo Finance pricing data to DB.")
#!/usr/bin/python
# -*- coding: utf-8 -*-
# retrieving_data.py
from __future__ import print_function
import pandas as pd
import MySQLdb as mdb
if __name__ == "__main__":
# Connect to the MySQL instance
db_host = 'localhost'
db_user = 'sec_user'
db_pass = 'password'
db_name = 'securities_master'
con = mdb.connect(db_host, db_user, db_pass, db_name)
# Select all of the historic Google adjusted close data
sql = """SELECT dp.price_date, dp.adj_close_price
FROM symbol AS sym
INNER JOIN daily_price AS dp
ON dp.symbol_id = sym.id
WHERE sym.ticker = 'GOOG'
ORDER BY dp.price_date ASC;"""
# Create a pandas dataframe from the SQL query
goog = pd.read_sql_query(sql, con=con, index_col='price_date')
# Output the dataframe tail
print(goog.tail())
CREATE TABLE exchange (
id int NOT NULL AUTO_INCREMENT,
abbrev varchar(32) NOT NULL,
name varchar(255) NOT NULL,
city varchar(255) NULL,
country varchar(255) NULL,
currency varchar(64) NULL,
timezone_offset time NULL,
created_date datetime NOT NULL,
last_updated_date datetime NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
CREATE TABLE data_vendor (
id int NOT NULL AUTO_INCREMENT,
name varchar(64) NOT NULL,
website_url varchar(255) NULL,
support_email varchar(255) NULL,
created_date datetime NOT NULL,
last_updated_date datetime NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
CREATE TABLE symbol (
id int NOT NULL AUTO_INCREMENT,
exchange_id int NULL,
ticker varchar(32) NOT NULL,
instrument varchar(64) NOT NULL,
name varchar(255) NULL,
sector varchar(255) NULL,
currency varchar(32) NULL,
created_date datetime NOT NULL,
last_updated_date datetime NOT NULL,
PRIMARY KEY (id),
KEY index_exchange_id (exchange_id)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
CREATE TABLE daily_price (
id int NOT NULL AUTO_INCREMENT,
data_vendor_id int NOT NULL,
symbol_id int NOT NULL,
price_date datetime NOT NULL,
created_date datetime NOT NULL,
last_updated_date datetime NOT NULL,
open_price decimal(19,4) NULL,
high_price decimal(19,4) NULL,
low_price decimal(19,4) NULL,
close_price decimal(19,4) NULL,
adj_close_price decimal(19,4) NULL,
volume bigint NULL,
PRIMARY KEY (id),
KEY index_data_vendor_id (data_vendor_id),
KEY index_symbol_id (symbol_id)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
#!/usr/bin/python
# -*- coding: utf-8 -*-
# insert_symbols.py
from __future__ import print_function
import datetime
from math import ceil
import bs4
import MySQLdb as mdb
import requests
def obtain_parse_wiki_snp500():
"""
Download and parse the Wikipedia list of S&P500
constituents using requests and BeautifulSoup.
Returns a list of tuples for to add to MySQL.
"""
# Stores the current time, for the created_at record
now = datetime.datetime.utcnow()
# Use requests and BeautifulSoup to download the
# list of S&P500 companies and obtain the symbol table
response = requests.get(
"http://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
)
soup = bs4.BeautifulSoup(response.text)
# This selects the first table, using CSS Selector syntax
# and then ignores the header row ([1:])
symbolslist = soup.select('table')[0].select('tr')[1:]
# Obtain the symbol information for each
# row in the S&P500 constituent table
symbols = []
for i, symbol in enumerate(symbolslist):
tds = symbol.select('td')
symbols.append(
(
tds[0].select('a')[0].text, # Ticker
'stock',
tds[1].select('a')[0].text, # Name
tds[3].text, # Sector
'USD', now, now
)
)
return symbols
def insert_snp500_symbols(symbols):
"""
Insert the S&P500 symbols into the MySQL database.
"""
# Connect to the MySQL instance
db_host = 'localhost'
db_user = 'sec_user'
db_pass = 'password'
db_name = 'securities_master'
con = mdb.connect(host=db_host, user=db_user, passwd=db_pass, db=db_name)
# Create the insert strings
column_str = "ticker, instrument, name, sector, currency, created_date, last_updated_date"
insert_str = ("%s, " * 7)[:-2]
final_str = "INSERT INTO symbol (%s) VALUES (%s)" % \
(column_str, insert_str)
# Using the MySQL connection, carry out
# an INSERT INTO for every symbol
with con:
cur = con.cursor()
cur.executemany(final_str, symbols)
if __name__ == "__main__":
symbols = obtain_parse_wiki_snp500()
insert_snp500_symbols(symbols)
print("%s symbols were successfully added." % len(symbols))
从下面提供的第一个屏幕截图中可以看出,“ ticker”清楚地显示为表格符号中的列之一。但是,当我运行以下代码时,它会返回“空集”,好像它不存在(或者至少我认为是它的含义):SHOW COLUMNS FROM symbol LIKE 'ticker';
第二个屏幕截图显示了我遇到问题的代码(错误是“字段列表”中的“未知列”代码”)。此错误是我执行屏幕截图1中所示测试的原因。
----答案1解决了以上问题
现在,运行此特定代码的最后一部分,称为“ retrieving_data”时,出现错误。脚本的输出应如下:
price_date adj_close_price
2015-06-09 526.69
2015-06-10 536.69
2015-06-11 534.61
2015-06-12 532.33
2015-06-15 527.20
相反,我收到了一堆运行时警告和这个微不足道的输出(请参见屏幕快照3):
Empty DataFrame
Columns: [adj_close_price]
Index: []
关于我可能做错了什么的任何输入?预先感谢您的帮助。
更新:没关系,我意识到该脚本不起作用,因为yahoo不再起作用。
[屏幕截图] [1]:https://i.stack.imgur.com/hrKjG.png
[screenshot2] [2]:https://i.stack.imgur.com/QD9l6.png
[screenshot3] [3]:https://i.stack.imgur.com/XasR9.png
[screenshot4] [4]:https://i.stack.imgur.com/KrX8B.png
答案 0 :(得分:1)
您的屏幕截图显示了‘id‘
表中的列名,例如‘ticker‘
和symbol
。据我从您的屏幕截图中可以看到,您的列名以Unicode字符‘
开头和结尾,以单引号引起来。您的列名为‘ticker‘
,但是查询引用了名为ticker
的列,但找不到。
MySQL喜欢用反引号分隔列名
`
字符,不同于单引号字符。但是,只要它们不是保留字并且不包含空格,就不必完全分隔列名。
您已经设法通过表名中的那些额外字符来创建表。仔细查看创建该表的方式。