绕过循环AttributeError:'NoneType'对象没有属性'findAll'

时间:2015-06-07 08:02:13

标签: python loops error-handling web-scraping

import requests
from bs4 import BeautifulSoup
import csv
from urlparse import urljoin
import urllib2


base_url = 'http://www.baseball-reference.com'
data = requests.get("http://www.baseball-reference.com/players/")
soup = BeautifulSoup(data.content)
player_url = 'http://www.baseball-reference.com/players/'
game_logs = 'http://www.baseball-reference.com/players/gl.cgi?id='
years = ['2000','2001','2002','2003','2004','2005','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']
url = []
for link in soup.find_all('a'):
    if link.has_attr('href'):
        base_url + link['href']
        url.append(base_url + link['href'])
sink = []
for l in url:
    if l[0:42] in player_url:
        sink.append(l)
abc = []
for aa in sink:
    if len(aa) > 48:
        abc.append(aa)
urlz = []
for ab in abc:
    data = requests.get(ab)
    soup = BeautifulSoup(data.content)
    for link in soup.find_all('a'):
        if link.has_attr('href'):
            urlz.append(base_url + link['href'])
abc = []
for aa in urlz:
    if game_logs in aa:
        abc.append(aa)
urlll = []
for ab in years:
    for ac in abc:
        if ab in ac:
            urlll.append(ac)

for j in urlll:
    response = requests.get(j)
    html = response.content
    soup = BeautifulSoup(html)
    table = soup.find('table', attrs={'id': 'batting_gamelogs'})
    list_of_rows = []
    for row in table.findAll('tr'):
        list_of_cells = []
        for cell in row.findAll('td'):
            text = cell.text.replace(' ', '').encode("utf-8")
            list_of_cells.append(text)
        list_of_rows.append(list_of_cells)
    print list_of_rows

当我遍历网址以获取表格时,表格中不存在表格。我收到一个错误,看起来像是:

Traceback (most recent call last):
  File "py5.py", line 55, in <module>
    list_of_cells.append(text)
AttributeError: 'NoneType' object has no attribute 'findAll'

即使没有桌子,还有办法继续循环吗?

1 个答案:

答案 0 :(得分:1)

使用try and except并处理错误

 for row in table.findAll('tr'):
        list_of_cells = []
        for cell in row.findAll('td'):
            text = cell.text.replace('&nbsp;', '').encode("utf-8")
            try:
                list_of_cells.append(text)
            except Exception, e:
                # handle exception
        list_of_rows.append(list_of_cells)