我正在尝试为四只股票提供废品市盈率。我不确定我哪里出错了,感谢你的帮助。
我认为问题在于我的贪婪和非贪婪的限定符,也许是如何复制正则表达式的URL。
import urllib
import re
symbolslist = ["aapl","spy","goog","nflx"]
i=0
while i<len(symbolslist):
url = "http://finance.yahoo.com/q?s=" +symbolslist[i] +"&q1=1"
htmlfile = urllib.urlopen(url)
htmltext = htmlfile.read()
regex = '<th scope="row" width="48%">"P/E "<span class="small">(ttm)</span>: </th><td class="yfnc_tabledata1">(.+?)</td>'
pattern = re.compile(regex)
price_to_earnings = re.findall(pattern,htmltext)
print "The price to earnings of", symbolslist[i]," is", price_to_earnings
i+=1
答案 0 :(得分:0)
此解决方案使用BeautifulSoup
import re
import urllib
try:
#Using bs4
from bs4 import BeautifulSoup
from bs4 import Tag
except ImportError:
#Using bs3
from BeautifulSoup import BeautifulSoup
from BeautifulSoup import Tag
def check_th(iarg):
if(iarg.name == u"th" and bool(set([(u"scope", u"row")]) <= set(iarg.attrs))):
if(any([bool(re.search("\s*P/E\s*", str(x))) for x in iarg.contents])):
return True
return False
tag_locations = \
[
lambda x: x.name == u"table" and bool(set([(u"id", u"table2")]) <= set(x.attrs)),
lambda x: x.name == u"tr",
check_th
]
symbolslist = ["aapl","spy","goog","nflx"]
for symbol in symbolslist:
url = "http://finance.yahoo.com/q?s=" + symbol +"&q1=1"
htmlfile = urllib.urlopen(url)
htmltext = htmlfile.read()
soup = BeautifulSoup(htmltext)
found_tags = [soup]
for tag_location in tag_locations:
if(found_tags):
found_tags = [x for found_tag in found_tags for x in found_tag.findAll(tag_location)]
if(found_tags):
data_tag = found_tags[0].findNextSibling(lambda x: x.name == u"td" and bool(set([(u"class", u"yfnc_tabledata1")]) <= set(x.attrs)))
print "The price to earnings of %(company)s is %(ratio)s" % {"company" : symbol, "ratio" : data_tag.text}