请帮助修复脚本。

import pprint
import requests

import bs4


def get_catalog(url):
    req = requests.get(url)
    if req.status_code != requests.codes.ok:
        print('Error: ', req.status_code)
    else:
        soup = bs4.BeautifulSoup(req.text)
        #print(soup)
        catalogMenu = soup.find('section', {'class': 'catalog'})
        catalogMenuList = catalogMenu.find('ul', {'class': 'topnav'})
        #print(catalogMenuList)

        return catalogMenuList


def parse_catalog_categories(catalogMenuList):
    catalogNames = []
    #li = catalogMenuList.findNext('li', limit=1)   #?????????????????
    pprint.pprint(li)


if __name__ == "__main__":
    url = 'http://first-store.ru/'
    catalogMenuList = get_catalog(url)
    if not catalogMenuList:
        print('Get catalog error')
    else:
        parse_catalog_categories(catalogMenuList)


问题是我找不到li第一层嵌套的所有后代。即:

iphone, ipad, ipod, imac, etc...


但不是:

iphone, iphone 5s, iphone 5s VIP, iphone 5c, .....

最佳答案

尝试将recursive=False设置为仅在标记的直接子代中搜索:

items = catalogMenuList.find_all('li', recursive=False)

关于python - 如何找到第一层的后代?,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/22016490/

10-10 01:12