没有逐页爬取,想多爬的自己改下,我上个博客里面有例子。
直接上代码吧:
# -*- coding:utf-8 -*-
import re
import requests
class Baidutupian(object):
def __init__(self, Leixing_name):
# self.headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"}
self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"}
self.base_url = "http://image.baidu.com/"
self.Leixing_name = str(Leixing_name)
def send_request(self, url, params ={}):
try:
html = requests.get(url, params = params, headers = self.headers).content
return html
except Exception, err:
print err
def load_page(self, html):
pattern = '"objURL":"(.*?)",'
pic_list = re.findall(pattern, html, re.S)
for link in pic_list:
print(link)
data = self.send_request(link)
self.write_image(data, link[-10:])
def write_image(self, data, filename):
print "[INFO]: 正在下载%s..." % filename
with open(u"D:\图片\哈哈\lpl" + filename, "wb") as f:
f.write(data)
def start_work(self):
html = self.send_request(self.base_url + "/search/flip?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1" +
"&fm=result&fr=&sf=1&fmq=1543200380146_R&pv=&ic=0&nc=1&z=&se=1&" +
"showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&ctd=154" +
"3200380146%5E00_1291X734&word=" + self.Leixing_name)
self.load_page(html)
if __name__ == "__main__":
Leixing_name = raw_input("请输入需要爬取的图片类型:")
Tupian = Baidutupian(Leixing_name)
Tupian.start_work()
也不知道爬点啥!!所以就又爬美女了!!!嘻嘻