python爬取b站排行榜视频信息

和上一篇相比，差别不是很大
 import xlrd#读取excel

 import xlwt#写入excel

 import requests

 import linecache

 import wordcloud

 import jieba

 import matplotlib.pyplot as plt

 from bs4 import BeautifulSoup

 if __name__=="__main__":

     f = xlwt.Workbook(encoding='utf-8') #创建工作簿

     sheet1 = f.add_sheet(u'sheet1') #创建sheet

     row0 = [u'ID',u'name',u'av',u'play_num',u'comment_num']

     #生成第一行

     for i in range(0,len(row0)):

         sheet1.write(0,i,row0[i])

     yun=""

     n=0#ID编号

     target='https://www.bilibili.com/ranking/all/160/0/3'#b站

     user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'

     headers = {'User-Agent':user_agent}

     req=requests.get(url=target)

     html=req.text

     html=html.replace('<br>',' ').replace('<br/>',' ').replace('/>','>')

     bf=BeautifulSoup(html,"html.parser")

     texts=bf.find('ul',class_='rank-list')

     texts_div=texts.find_all('div',class_='info')

     #print(texts_div)

     for item in texts_div:

         n=n+1

         item_name=item.find('a').text#标题

         yun+=str(item_name)

         item_href=item.find('a')['href']#链接

         h=item_href.rfind('/')

         item_href=item_href[h+1:]

         item_refer=item.find_all('span',class_='data-box')

         item_refer1=item_refer[0].text

         item_refer2=item_refer[1].text

         #print('{} {} {} {}\n'.format(item_name,item_href,item_refer1,item_refer2))

         mid=[n,item_name,item_href,item_refer1,item_refer2]

         #print(mid)

         for i in range(len(row0)):#写入excel

             sheet1.write(n,i,mid[i])

     f.save('demo1.xls') #保存文件

     # 结巴分词，生成字符串，wordcloud无法直接生成正确的中文词云

     cut_text = " ".join(jieba.cut(yun))

     wc = wordcloud.WordCloud(

     #设置字体，不然会出现口字乱码，文字的路径是电脑的字体一般路径，可以换成别的

     font_path="C:/Windows/Fonts/simfang.ttf",

     #设置了背景，宽高

     background_color="white",width=1000,height=880).generate(cut_text)

     plt.imshow(wc, interpolation="bilinear")

     plt.axis("off")

     plt.show()

     print("Done!")
nicoollas

python爬取b站排行榜视频信息