python-爬某页面图

注意:python3+版本与python2有一定区别,需要注意多点。

 #! /usr/bin/env python3.5.4
# coding=utf-8
# 爬百度某贴吧页面图 import urllib.request
import re def get_html(url):
page = urllib.request.urlopen(url)
htmla = page.read()
return htmla def get_img(htmlb):
reg = r'src="(.+?\.jpg)" pic_ext'
imgre = re.compile(reg)
htmlb = htmlb.decode('utf-8') # python3.5特性,必须添加
imglist = re.findall(imgre, htmlb)
x = 0
for imgurl in imglist:
urllib.request.urlretrieve(imgurl, '%s.jpg' % x)
x += 1 htmlb = get_html("http://tieba.baidu.com/p/2460150866") # 结尾要加换行符
print(get_img(htmlb))
05-11 17:48