Python爬虫 | 爬虫

import requests
#import bs4
from bs4 import  BeautifulSoup

#from bs4 import BeautifulStoneSoup
def getHTML(url):
        r = requests.get(url, stream=True)
        r.encoding = 'gbk2312'
    #print(r)
   # print(r.text) 
        return r.content


def parseHTML(html):
    soup = BeautifulSoup(html, fromEncoding="gb18030")

    body = soup.table.prettify()
    tables = soup.findAll('table')
    tab = tables[0]
    for tr in tab.findAll('tr'):
      for td in tr.findAll('td'):
        print (td.getText()),



URL = 'http://shuju.3156.cn/gcyp/index-page-'

#URL="http://app1.sfda.gov.cn/datasearchcnda/face3/base.jsp?tableId=25&tableName=TABLE25&title=%B9%FA%B2%FA%D2%A9%C6%B7&bcId=152904713761213296322795806604"
for num in list(range(1,1000)):
    html = getHTML(URL+ str(num))
    print("第"+str(num)+"页")
    parseHTML(html)
#html = getHTML(URL)
print(html)

最近再演就python 初步尝试爬虫的问题

抓取页面地址：http://shuju.3156.cn/gcyp/index-page-1

主要包括赝品名称生产企业，产品类别等内容以上程序已经完成运行