更新 这是我的代码
import urllib2
import datetime
import re
from bs4 import BeautifulSoup
# today's date
date = datetime.datetime.today().strftime('%-m/%d/%Y')
validDay = "Mon\.|Tue\.|Wed\.|Thu(r)?(s)?\.|Fri\."
website = "http://www.texassports.com/schedule.aspx?path=baseball"
opener = urllib2.build_opener()
##add headers that make it look like I'm a browser
opener.addheaders = [('User-Agent', 'Mozilla/5.0')]
page = opener.open(website)
# turn page into html object
soup = BeautifulSoup(page, 'html.parser')
#print soup.prettify()
#get all home games
all_rows = soup.find_all('tr', class_='schedule_home_tr')
# see if any game is today
# entryForToday = [t for t in all_rows if t.findAll('nobr',text=re.compile('.*({}).*'.format(date)))]
# hard coding for testing weekend
entryForToday = [t for t in all_rows if t.findAll('nobr',text=re.compile('3/11/2017'))]
time = "schedule_dgrd_time/result"
for elements in entryForToday:
for element in elements:
#this is where I'm stuck.
# if element.attrs:
# print element.attrs['class'][0]
答案 0 :(得分:0)
所以我能弄清楚。我有一些没有attrs的NavigableString因此引发了错误。 element.attrs['class'][0]
现在可以正常运作了。我必须检查isinstanceOf是否为标签,如果不是,它会跳过它。 Anywho,对于任何有兴趣的人,我的代码都在下面。
import urllib2
import datetime
import re
from bs4 import BeautifulSoup
from bs4 import Tag
# today's date
date = datetime.datetime.today().strftime('%-m/%d/%Y')
validDay = "Mon\.|Tue\.|Wed\.|Thu(r)?(s)?\.|Fri\."
website = "http://www.texassports.com/schedule.aspx?path=baseball"
opener = urllib2.build_opener()
##add headers that make it look like I'm a browser
opener.addheaders = [('User-Agent', 'Mozilla/5.0')]
page = opener.open(website)
# turn page into html object
soup = BeautifulSoup(page, 'html.parser')
#print soup.prettify()
#get all home games
all_rows = soup.find_all('tr', class_='schedule_home_tr')
# see if any game is today
# entryForToday = [t for t in all_rows if t.findAll('nobr',text=re.compile('.*({}).*'.format(date)))]
# hard coding for testing weekend
entryForToday = [t for t in all_rows if t.findAll('nobr',text=re.compile('3/14/2017'))]
classForTime = "schedule_dgrd_time/result"
timeOfGame = "none";
if entryForToday:
entryForToday = [t for t in entryForToday if t.findAll('td',
if entryForToday:
for elements in entryForToday:
for element in elements:
if isinstance(element, Tag):
if element.attrs['class'][0] == classForTime:
timeOfGame = element.text
# print element.text
print timeOfGame