1、说明

用 IIS 以WEB形式发布了本地文件夹,提供文件下载,并设置了访问权限;默认下载需要点击一个一个的下载,web界面如下:

3、脚本

执行脚本批量下载文件,会在当前目录创建文件夹,并压缩该文件夹。

# -*- coding: utf-8 -*-
import requests
import re
import os
import zipfile
import urllib3
urllib3.disable_warnings()

# 需要下载的文件信息
year = 2019
month = 12
date = 18
folder = 'PH003'

# 网站信息
username = 'admin'
password = '*********'
domain = ['https://*****.com']
files_url = 'https://*****.com/%s/%s/%s/%s/' % (year, month, date, folder)
dir_name = '%s_%s_%s_%s' % (year, month, date, folder)
zip_dir_name = '%s.zip' % dir_name


def create_folder(path):
    isExists = os.path.exists(path)
    if not isExists:
        os.makedirs(path)
        print("目录" + path + "创建成功")
        return True
    else:
        print("目录" + path + "已存在")
        return False


def get_html():
    # 获取网页源代码
    r = requests.get(files_url, auth=(username, password), verify=False)
    html = r.content.decode('utf-8')
    # print(html)

    # 获取“url+文件名称”块
    url_content = re.compile(r'<A HREF=".*?">.*?</A>', re.S)
    url_contents = re.findall(url_content, html)
    return url_contents


def get_name_url():
    # 过滤出文件url、文件名称
    name_url = []
    url_contents = get_html()
    for i in url_contents:
        # 过滤
        url_reg = re.compile(r'<A HREF="(.*?)">')
        name_reg = re.compile(r'<A HREF=".*?">(.*?)</A>')
        url_items = re.findall(url_reg, i)
        name_items = re.findall(name_reg, i)

        # 拼接下载地址
        url = domain + url_items
        url_items = ["".join(url)]
        # print(files_url_items)
        # print(name_items)

        for k, v in zip(name_items, url_items):
            if k == "[转到父目录]":
                continue
            else:
                name_url.append([k, v])
    return name_url


def download_files(dir_name):
    # 下载文件
    create_folder(dir_name)
    name_url = get_name_url()
    for i in name_url:
        # i[0]:name, i[1]:url
        r = requests.get(i[1], auth=(username, password), verify=False)
        with open("%s\\%s" % (dir_name, i[0]), "wb") as code:
            code.write(r.content)
            print("正在下载:" + i[0])
    print("下载完成,开始压缩")


def zip_files(dir_name, zip_dir_name):
    # 压缩文件夹
    z = zipfile.ZipFile(zip_dir_name, 'w', zipfile.ZIP_DEFLATED)
    for dirpath, dirnames, filenames in os.walk(dir_name):
        fpath = dirpath.replace(dir_name, '')
        fpath = fpath and fpath + os.sep or ''
        for filename in filenames:
            z.write(os.path.join(dirpath, filename), fpath + filename)
    print('压缩成功')
    z.close()


def main():
    download_files(dir_name)
    zip_files(dir_name, zip_dir_name)


main()

执行结果如下:

12-18 08:09
查看更多