我从Github上取下Python script,用于分析&排名股票。我终于让它运行但不幸的是EV / EBITDA和股东收益率正在填充它们的默认值,1000&分别为0。
我花了最近几天尝试进行故障排除,在此过程中学到了很多东西,但遗憾的是没有运气。我认为它试图从'不存在的行中提取数据'刮刀部分或引用不正确的HTML。我会粘贴两个代码片段,我认为错误可能存在于其中,但其余文件都是上面链接的。
主要文件
from sys import stdout
from Stock import Stock
import Pickler
import Scraper
import Rankings
import Fixer
import Writer
# HTML error code handler - importing data is a chore, and getting a connection
# error halfway through is horribly demotivating. Use a pickler to serialize
# imported data into a hot-startable database.
pklFileName = 'tmpstocks.pkl'
pickler = Pickler.Pickler()
# Check if a pickled file exists. Load it if the user requests. If no file
# loaded, stocks is an empty list.
stocks = pickler.loadPickledFile(pklFileName)
# Scrape data from FINVIZ. Certain presets have been established (see direct
# link for more details)
url = 'http://finviz.com/screener.ashx?v=152&f=cap_smallover&' + \
'ft=4&c=0,1,2,6,7,10,11,13,14,45,65'
html = Scraper.importHtml(url)
# Parse the HTML for the number of pages from which we'll pull data
nPages = -1
for line in html:
if line[0:40] == '<option selected="selected" value=1>Page':
# Find indices
b1 = line.index('/') + 1
b2 = b1 + line[b1:].index('<')
# Number of pages containing stock data
nPages = int(line[b1:b2])
break
# Parse data from table on the first page of stocks and store in the database,
# but only if no data was pickled
if pickler.source == Pickler.PickleSource.NOPICKLE:
Scraper.importFinvizPage(html, stocks)
# The first page of stocks (20 stocks) has been imported. Now import the
# rest of them
source = Pickler.PickleSource.FINVIZ
iS = pickler.getIndex(source, 1, nPages + 1)
for i in range(iS, nPages + 1):
try:
# Print dynamic progress message
print('Importing FINVIZ metrics from page ' + str(i) + ' of ' + \
str(nPages) + '...', file=stdout, flush=True)
# Scrape data as before
url = 'http://finviz.com/screener.ashx?v=152&f=cap_smallover&ft=4&r=' + \
str(i*20+1) + '&c=0,1,2,6,7,10,11,13,14,45,65'
html = Scraper.importHtml(url)
# Import stock metrics from page into a buffer
bufferList = []
Scraper.importFinvizPage(html, bufferList)
# If no errors encountered, extend buffer to stocks list
stocks.extend(bufferList)
except:
# Error encountered. Pickle stocks for later loading
pickler.setError(source, i, stocks)
break
# FINVIZ stock metrics successfully imported
print('\n')
# Store number of stocks in list
nStocks = len(stocks)
# Handle pickle file
source = Pickler.PickleSource.YHOOEV
iS = pickler.getIndex(source, 0, nStocks)
# Grab EV/EBITDA metrics from Yahoo! Finance
for i in range(iS, nStocks):
try:
# Print dynamic progress message
print('Importing Key Statistics for ' + stocks[i].tick +
' (' + str(i) + '/' + str(nStocks - 1) + ') from Yahoo! Finance...', \
file=stdout, flush=True)
# Scrape data from Yahoo! Finance
url = 'http://finance.yahoo.com/q/ks?s=' + stocks[i].tick + '+Key+Statistics'
html = Scraper.importHtml(url)
# Parse data
for line in html:
# Check no value
if 'There is no Key Statistics' in line or \
'Get Quotes Results for' in line or \
'Changed Ticker Symbol' in line or \
'</html>' in line:
# Non-financial file (e.g. mutual fund) or
# Ticker not located or
# End of html page
stocks[i].evebitda = 1000
break
elif 'Enterprise Value/EBITDA' in line:
# Line contains EV/EBITDA data
evebitda = Scraper.readYahooEVEBITDA(line)
stocks[i].evebitda = evebitda
break
except:
# Error encountered. Pickle stocks for later loading
pickler.setError(source, i, stocks)
break
# Yahoo! Finance EV/EBITDA successfully imported
print('\n')
# Handle pickle file
source = Pickler.PickleSource.YHOOBBY
iS = pickler.getIndex(source, 0, nStocks)
# Grab BBY metrics from Yahoo! Finance
for i in range(iS, nStocks):
try:
# Print dynamic progress message
print('Importing Cash Flow for ' + stocks[i].tick +
' (' + str(i) + '/' + str(nStocks - 1) + ') from Yahoo! Finance...', \
file=stdout, flush=True)
# Scrape data from Yahoo! Finance
url = 'http://finance.yahoo.com/q/cf?s=' + stocks[i].tick + '&ql=1'
html = Scraper.importHtml(url)
# Parse data
totalBuysAndSells = 0
for line in html:
# Check no value
if 'There is no Cash Flow' in line or \
'Get Quotes Results for' in line or \
'Changed Ticker Symbol' in line or \
'</html>' in line:
# Non-financial file (e.g. mutual fund) or
# Ticker not located or
# End of html page
break
elif 'Sale Purchase of Stock' in line:
# Line contains Sale/Purchase of Stock information
totalBuysAndSells = Scraper.readYahooBBY(line)
break
# Calculate BBY as a percentage of current market cap
bby = round(-totalBuysAndSells / stocks[i].mktcap * 100, 2)
stocks[i].bby = bby
except:
# Error encountered. Pickle stocks for later loading
pickler.setError(source, i, stocks)
break
# Yahoo! Finance BBY successfully imported
if not pickler.hasErrorOccurred:
# All data imported
print('\n')
print('Fixing screener errors...')
# A number of stocks may have broken metrics. Fix these (i.e. assign out-of-
# bounds values) before sorting
stocks = Fixer.fixBrokenMetrics(stocks)
print('Ranking stocks...')
# Calculate shareholder Yield
for i in range(nStocks):
stocks[i].shy = stocks[i].div + stocks[i].bby
# Time to rank! Lowest value gets 100
rankPE = 100 * (1 - Rankings.rankByValue([o.pe for o in stocks]) / nStocks)
rankPS = 100 * (1 - Rankings.rankByValue([o.ps for o in stocks]) / nStocks)
rankPB = 100 * (1 - Rankings.rankByValue([o.pb for o in stocks]) / nStocks)
rankPFCF = 100 * (1 - Rankings.rankByValue([o.pfcf for o in stocks]) / nStocks)
rankEVEBITDA = 100 * (1 - Rankings.rankByValue([o.evebitda for o in stocks]) / nStocks)
# Shareholder yield ranked with highest getting 100
rankSHY = 100 * (Rankings.rankByValue([o.shy for o in stocks]) / nStocks)
# Rank total stock valuation
rankStock = rankPE + rankPS + rankPB + rankPFCF + rankEVEBITDA + rankSHY
# Rank 'em
rankOverall = Rankings.rankByValue(rankStock)
# Calculate Value Composite - higher the better
valueComposite = 100 * rankOverall / len(rankStock)
# Reverse indices - lower index -> better score
rankOverall = [len(rankStock) - 1 - x for x in rankOverall]
# Assign to stocks
for i in range(nStocks):
stocks[i].rank = rankOverall[i]
stocks[i].vc = round(valueComposite[i], 2)
print('Sorting stocks...')
# Sort all stocks by normalized rank
stocks = [x for (y, x) in sorted(zip(rankOverall, stocks))]
# Sort top decile by momentum factor. O'Shaughnessey historically uses 25
# stocks to hold. The top decile is printed, and the user may select the top 25
# (or any n) from the .csv file.
dec = int(nStocks / 10)
topDecile = []
# Store temporary momentums from top decile for sorting reasons
moms = [o.mom for o in stocks[:dec]]
# Sort top decile by momentum
for i in range(dec):
# Get index of top momentum performer in top decile
topMomInd = moms.index(max(moms))
# Sort
topDecile.append(stocks[topMomInd])
# Remove top momentum performer from further consideration
moms[topMomInd] = -100
print('Saving stocks...')
# Save momentum-weighted top decile
topCsvPath = 'top.csv'
Writer.writeCSV(topCsvPath, topDecile)
# Save results to .csv
allCsvPath = 'stocks.csv'
Writer.writeCSV(allCsvPath, stocks)
print('\n')
print('Complete.')
print('Top decile (sorted by momentum) saved to: ' + topCsvPath)
print('All stocks (sorted by trending value) saved to: ' + allCsvPath)
刮板
import re
from urllib.request import urlopen
from Stock import Stock
def importHtml(url):
"Scrapes the HTML file from the given URL and returns line break delimited \
strings"
response = urlopen(url, data = None)
html = response.read().decode('utf-8').split('\n')
return html
def importFinvizPage(html, stocks):
"Imports data from a FINVIZ HTML page and stores in the list of Stock \
objects"
isFound = False
for line in html:
if line[0:15] == '<td height="10"':
isFound = True
# Import data line into stock database
_readFinvizLine(line, stocks)
if isFound and len(line) < 10:
break
return
def _readFinvizLine(line, stocks):
"Imports stock metrics from the data line and stores it in the list of \
Stock objects"
# Parse html
(stkraw, dl) = _parseHtml(line)
# Create new stock object
stock = Stock()
# Get ticker symbol
stock.tick = stkraw[dl[1] + 1: dl[2]]
# Get company name
stock.name = stkraw[dl[2] + 1 : dl[3]]
# Get market cap multiplier (either MM or BB)
if stkraw[dl[4] - 1] == 'B':
capmult = 1000000000
else:
capmult = 1000000
# Get market cap
stock.mktcap = capmult * _toFloat(stkraw[dl[3] + 1 : dl[4] - 1])
# Get P/E ratio
stock.pe = _toFloat(stkraw[dl[4] + 1 : dl[5]])
# Get P/S ratio
stock.ps = _toFloat(stkraw[dl[5] + 1 : dl[6]])
# Get P/B ratio
stock.pb = _toFloat(stkraw[dl[6] + 1 : dl[7]])
# Get P/FCF ratio
stock.pfcf = _toFloat(stkraw[dl[7] + 1 : dl[8]])
# Get Dividend Yield
stock.div = _toFloat(stkraw[dl[8] + 1 : dl[9] - 1])
# Get 6-mo Relative Price Strength
stock.mom = _toFloat(stkraw[dl[9] + 1 : dl[10] - 1])
# Get Current Stock Price
stock.price = _toFloat(stkraw[dl[11] + 1 : dl[12]])
# Append stock to list of stocks
stocks.append(stock)
return
def _toFloat(line):
"Converts a string to a float. Returns NaN if the line can't be converted"
try:
num = float(line)
except:
num = float('NaN')
return num
def readYahooEVEBITDA(line):
"Returns EV/EBITDA data from Yahoo! Finance HTML line"
# Parse html
(stkraw, dl) = _parseHtml(line)
for i in range(0, len(dl)):
if (stkraw[dl[i] + 1 : dl[i] + 24] == 'Enterprise Value/EBITDA'):
evebitda = stkraw[dl[i + 1] + 1 : dl[i + 2]]
break
return _toFloat(evebitda)
def readYahooBBY(line):
"Returns total buys and sells from Yahoo! Finance HTML line. Result will \
still need to be divided by market cap"
# Line also contains Borrowings details - Remove it all
if 'Net Borrowings' in line:
# Remove extra data
line = line[:line.find('Net Borrowings')]
# Trim prior data
line = line[line.find('Sale Purchase of Stock'):]
# Determine if buys or sells, replace open parantheses:
# (#,###) -> -#,###
line = re.sub(r'[(]', '-', line)
# Eliminate commas and close parantheses: -#,### -> -####
line = re.sub(r'[,|)]', '', line)
# Remove HTML data and markup, replacing with commas
line = re.sub(r'[<.*?>|]', ',', line)
line = re.sub(' ', ',', line)
# Locate the beginnings of each quarterly Sale Purchase points
starts = [m.start() for m in re.finditer(',\d+,|,.\d+', line)]
# Locate the ends of each quarterly Sale Purchase points
ends = [m.start() for m in re.finditer('\d,', line)]
# Sum all buys and sells across year
tot = 0
for i in range(0, len(starts)):
# x1000 because all numbers are in thousands
tot = tot + float(line[starts[i] + 1 : ends[i] + 1]) * 1000
return tot
def _parseHtml(line):
"Parses the HTML line by </td> breaks and returns the delimited string"
# Replace </td> breaks with placeholder, '`'
ph = '`'
rem = re.sub('</td>', ph, line)
# The ticker symbol initial delimiter is different
# Remove all other remaining HTML data
stkraw = re.sub('<.*?>', '', rem)
# Replace unbalanced HTML
stkraw = re.sub('">', '`', stkraw)
# Find the placeholders
dl = [m.start() for m in re.finditer(ph, stkraw)]
return (stkraw, dl)
如果有人有任何意见或更好的方法,如beautifulsoup,我真的很感激!我对任何有用的教程都非常开放。我的目的是提高我的编程能力,并有一个有效的股票筛选器。
答案 0 :(得分:0)
我在Python中以及在Matlab中抓取Yahoo数据也遇到了同样的问题。作为一种解决方法,我在VBA中编写了一个宏,通过访问每个股票的关键统计页面来获取雅虎的所有EV / EBITDA数据。然而,市场上限超过2亿美元的所有3,000多只股票需要大约一天的时间,这实际上并不实际。
我曾尝试在网上找到各种股票筛选器上的EV / EBITDA,但他们要么不报告,要么只让你下载几百只股票&#39;没有支付的数据。 Busy Stock的screener在这方面似乎是最好的,但是它们的EV / EBITDA数字并不排在雅虎之列,这让我担心他们使用不同的方法。
一个解决方案和我的建议是使用Quantopian中的Trending Value算法,它是免费的。您可以在此处找到代码:https://www.quantopian.com/posts/oshaugnessy-what-works-on-wall-street Quantopian会让你将算法重新测试到2002年,并对它进行实时测试。