#coding=gbk
import requests
from fake_useragent import UserAgent
from lxml import etree
import urllib
import re
import os pattern = 'https://(.+?)\.(.*).com'
# url = 'https://wangxu.tuchong.com/23892889/'
url = input("请输入图虫网图片地址:")
headers = {
'User-Agent':UserAgent().chrome
}
response = requests.get(url,headers = headers)
e = etree.HTML(response.text)
img_path = '//article//img/@src'
img_urls = e.xpath(img_path)
# print(img_urls)
num = 1
for img_url in img_urls:
response = requests.get(img_url,headers = headers)
name = re.search(pattern,url).group(1)
if os.path.exists("图虫_{}".format(name)):
pass
else:
os.mkdir('图虫_{}'.format(name))
urllib.request.urlretrieve(img_url, './图虫_{0}/图{1}.png'.format(name,num))
print("第{}张图片下载完毕".format(num))
num += 1

爬取图虫网 示例网址 https://wangxu.tuchong.com/23892889/-LMLPHP

 


2020-07-15

05-11 22:09