请帮助修复脚本。
import pprint
import requests
import bs4
def get_catalog(url):
req = requests.get(url)
if req.status_code != requests.codes.ok:
print('Error: ', req.status_code)
else:
soup = bs4.BeautifulSoup(req.text)
#print(soup)
catalogMenu = soup.find('section', {'class': 'catalog'})
catalogMenuList = catalogMenu.find('ul', {'class': 'topnav'})
#print(catalogMenuList)
return catalogMenuList
def parse_catalog_categories(catalogMenuList):
catalogNames = []
#li = catalogMenuList.findNext('li', limit=1) #?????????????????
pprint.pprint(li)
if __name__ == "__main__":
url = 'http://first-store.ru/'
catalogMenuList = get_catalog(url)
if not catalogMenuList:
print('Get catalog error')
else:
parse_catalog_categories(catalogMenuList)
问题是我找不到
li
第一层嵌套的所有后代。即:iphone, ipad, ipod, imac, etc...
但不是:
iphone, iphone 5s, iphone 5s VIP, iphone 5c, .....
最佳答案
尝试将recursive=False
设置为仅在标记的直接子代中搜索:
items = catalogMenuList.find_all('li', recursive=False)
关于python - 如何找到第一层的后代?,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/22016490/