python文件 批量下载 、图片批量下载 ,批量请求、爬虫

#====================================================================================

downloadFile.py

#====================================================================================

#!/usr/bin/env python
# -*- coding:utf-8 -*-
 
from gevent import monkey
monkey.patch_all()
from gevent.pool import Pool
import requests
import sys
import os

def download(url):
    chrome = 'Mozilla/5.0 (X11; Linux i86_64) AppleWebKit/537.36 ' +'(KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36'
    headers = {'User-Agent': chrome}
    # filename = url.split('/')[-1].strip()
    filename = "respose.log"
    r = requests.get(url.strip(), headers=headers, stream=True)
    with open(filename, 'a+') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
        f.flush()
        print filename,"is ok"

def removeLine(key, filename):
    os.system('sed -i /%s/d %s' % (key, filename))

if __name__ =="__main__":
    if len(sys.argv) == 2:
        filename = sys.argv[1]
        f = open(filename,"r")
        p = Pool(4)
        for line in f.readlines():
            if line:
                p.spawn(download, line.strip())
                key = line.split('/')[-1].strip()
                #removeLine(key, filename)
                f.close()
                p.join()
    else:
        print 'Usage: python %s urls.txt' % sys.argv[0]

#====================================================================================

#====================================================================================

测试文件 url.txt

#===========================

http://download2.boulder.ibm.com/sar/CMA/RAA/075lj/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075ln/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075lt/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075m7/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075m9/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075mb/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075mf/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075mn/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075ms/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075mv/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075nd/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075nk/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075no/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075nr/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075ns/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075nu/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075ny/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075o0/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075o1/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075p8/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075px/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075py/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075pz/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075q1/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075q3/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075q5/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/075zm/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/0758i/0/
http://download2.boulder.ibm.com/sar/CMA/RAA/0759s/0/

#=================================================

运行结果

G:\test\appscan>python downloadFile.py url.txt

respose.log is ok
respose.log is ok
respose.log is ok
respose.log is ok
respose.log is ok
respose.log is ok
respose.log is ok
respose.log is ok
respose.log is ok

#================================

结果文件

Python 批量文件下载-LMLPHP

Python 批量文件下载-LMLPHP

05-11 23:03