如何找到第一级的后代?

时间:2014-02-25 14:03:13

标签: python python-3.x beautifulsoup

请帮助修复脚本。

import pprint
import requests

import bs4


def get_catalog(url):
    req = requests.get(url)
    if req.status_code != requests.codes.ok:
        print('Error: ', req.status_code)
    else:
        soup = bs4.BeautifulSoup(req.text)
        #print(soup)
        catalogMenu = soup.find('section', {'class': 'catalog'})
        catalogMenuList = catalogMenu.find('ul', {'class': 'topnav'})
        #print(catalogMenuList)

        return catalogMenuList


def parse_catalog_categories(catalogMenuList):
    catalogNames = []
    #li = catalogMenuList.findNext('li', limit=1)   #?????????????????
    pprint.pprint(li)


if __name__ == "__main__":
    url = 'http://first-store.ru/'
    catalogMenuList = get_catalog(url)
    if not catalogMenuList:
        print('Get catalog error')
    else:
        parse_catalog_categories(catalogMenuList)

问题在于我无法找到li第一级嵌套的所有后代。即:

iphone, ipad, ipod, imac, etc... 

但不是:

iphone, iphone 5s, iphone 5s VIP, iphone 5c, .....

1 个答案:

答案 0 :(得分:4)

尝试将recursive=False设置为仅在代码的直接子代中搜索:

items = catalogMenuList.find_all('li', recursive=False)