官方文档
官方文档:https://docs.python.org/3/library/urllib.html
获取页面内容
第一种方式
import urllib.request url = 'https://www.baidu.com/'
r = urllib.request.urlopen(url)
print(r) # <http.client.HTTPResponse object at 0x00000201E6C66CF8>
print(r.read().decode('utf-8'))
另一种方式
import urllib.request url = 'http://www.cnblogs.com/0bug/'
req = urllib.request.Request(url)
res = urllib.request.urlopen(req)
print(res.read().decode('utf-8'))
发送内容
import urllib.request
import urllib.parse url = 'http://httpbin.org/post'
data = bytes(urllib.parse.urlencode({'name': 'lcg'}), encoding='utf-8')
r = urllib.request.urlopen(url, data=data)
print(r.read().decode('utf-8'))
设置超时时间
设置超时时间
import urllib.request url = 'http://www.cnblogs.com/0bug/'
r = urllib.request.urlopen(url, timeout=1) # 设置超时时间
print(r.read().decode('utf8'))
异常处理
import urllib.request
import urllib.error
import socket url = 'http://www.cnblogs.com/0bug/'
try:
r = urllib.request.urlopen(url, timeout=0.01) # 设置超时时间
print(r.read().decode('utf8'))
except urllib.error.URLError as e:
if isinstance(e.reason, socket.timeout):
print('请求超时')
响应码、响应头
import urllib.request url = 'http://www.cnblogs.com/0bug/'
r = urllib.request.urlopen(url)
print(r.status) # 200
print(r.getheaders()) # [(('Content-Type', 'text/html; charset=utf-8'),......]
print(r.getheader('Content-Type')) # text/html; charset=utf-8
构造请求信息
import urllib.request
import urllib.parse url = 'http://www.cnblogs.com/0bug/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36',
'Host': 'www.cnblogs.com'
}
dic = {'name': 'lcg'}
data = bytes(urllib.parse.urlencode(dic), encoding='utf-8')
req = urllib.request.Request(url=url, data=data, headers=headers, method='POST')
res = urllib.request.urlopen(req)
print(res.read().decode('utf-8'))
另一种添加请求头的方式
import urllib.request
import urllib.parse url = 'http://www.cnblogs.com/0bug/'
dic = {'name': 'lcg'}
data = bytes(urllib.parse.urlencode(dic), encoding='utf-8')
req = urllib.request.Request(url=url, data=data, method='POST')
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/53...')
res = urllib.request.urlopen(req)
print(res.read().decode('utf-8'))
代理
import urllib.request url = 'http://www.cnblogs.com/0bug/'
proxy_handler = urllib.request.ProxyHandler({
'http': 'http://122.114.31.177:808',
'https': 'https://124.133.75.183:8118'
})
opener = urllib.request.build_opener(proxy_handler)
r = opener.open(url)
print(r.read().decode('utf-8'))