请帮助修复脚本。
import pprint
import requests
import bs4
def get_catalog(url):
req = requests.get(url)
if req.status_code != requests.codes.ok:
print('Error: ', req.status_code)
else:
soup = bs4.BeautifulSoup(req.text)
#print(soup)
catalogMenu = soup.find('section', {'class': 'catalog'})
catalogMenuList = catalogMenu.find('ul', {'class': 'topnav'})
#print(catalogMenuList)
return catalogMenuList
def parse_catalog_categories(catalogMenuList):
catalogNames = []
#li = catalogMenuList.findNext('li', limit=1) #?????????????????
pprint.pprint(li)
if __name__ == "__main__":
url = 'http://first-store.ru/'
catalogMenuList = get_catalog(url)
if not catalogMenuList:
print('Get catalog error')
else:
parse_catalog_categories(catalogMenuList)
问题在于我无法找到li
第一级嵌套的所有后代。即:
iphone, ipad, ipod, imac, etc...
但不是:
iphone, iphone 5s, iphone 5s VIP, iphone 5c, .....
答案 0 :(得分:4)
尝试将recursive=False
设置为仅在代码的直接子代中搜索:
items = catalogMenuList.find_all('li', recursive=False)