import os, re, sys, urllib2
from bs4 import BeautifulSoup
import lxml
def get_epg(channel, html):
soup = BeautifulSoup(html, "lxml")
main_div = soup.find("div", {"class":"viewport-container"})
elements = main_div.find_all("li")
for element in elements:
cmp = element.find("div", { "class" : "channel" } ).getText()
#return cmp
if channel == cmp:
print "found"
return element
EPG_URL = "http://www.hoerzu.de/tv-programm/jetzt/"
html = urllib2.urlopen(EPG_URL)
print get_epg("ZDF", html)
结果:
Traceback (most recent call last):
File "epg.py", line 17, in <module>
print get_epg("ZDF", html)
File "epg.py", line 10, in get_epg
cmp = element.find("div", { "class" : "channel" } ).getText()
AttributeError: 'NoneType' object has no attribute 'getText'
我真的不知道这里有什么问题,因为当我这样做时:
for element in elements:
cmp = element.find("div", { "class" : "channel" } ).getText()
return cmp
错误没有显示出来,一切都按预期进行......
答案 0 :(得分:3)
第二次迭代显然返回None。
<div class="channel">Das Erste</div>
None
None
None
<div class="channel">ZDF</div>
None
None
None
<div class="channel">RTL</div>
None
None
None
<div class="channel">Sat.1</div>
None
None
None
<div class="channel">ProSieben</div>
None
None
None
<div class="channel">kabel eins</div>
None
None
None
<div class="channel">RTL II</div>
None
None
None
<div class="channel">VOX</div>
None
None
None
<div class="channel">Arte</div>
None
None
None
<div class="channel">3sat</div>
None
None
None
<div class="channel">Super RTL</div>
None
None
None
<div class="channel">KiKA</div>
None
None
None
<div class="channel">NDR</div>
None
None
None
<div class="channel">WDR</div>
None
None
None
<div class="channel">MDR</div>
None
None
None
<div class="channel">BR</div>
None
None
None
<div class="channel">SWR</div>
None
None
None
<div class="channel">HR</div>
None
None
None
<div class="channel">RBB</div>
None
None
None
<div class="channel">n-tv</div>
None
None
None
<div class="channel">N24</div>
None
None
None
<div class="channel">Servus TV</div>
None
None
None
<div class="channel">SPORT1</div>
None
None
None
<div class="channel">TV.Berlin</div>
None
None
None
<div class="channel">Hamburg 1</div>
None
None
None
<div class="channel">Eurosport</div>
None
None
None
<div class="channel">München TV</div>
None
None
None
<div class="channel">Franken Fernsehen</div>
None
None
None
<div class="channel">Tele 5</div>
None
None
None
<div class="channel">Das VIERTE</div>
None
None
None
<div class="channel">NRW TV</div>
None
None
None
<div class="channel">Nickelodeon / Comedy Central</div>
None
None
None
所以你必须检查这个条件,而不是盲目地调用getText()。
答案 1 :(得分:-1)
来自bs4 import BeautifulSoup
你应该使用“main_div.findAll”
表示Bs4: find_all ---&gt;的findAll