import time
import asyncio
import aiohttp
from lxml import etree
start_time = time.time()
async def get_url(url):
session = aiohttp.ClientSession()
response = await session.get(url)
result = await response.text()
terr = etree.HTML(result)
ret = terr.xpath('//*[@id="content"]/div[1]/ul/li')
for li in ret:
title = li.xpath('.//div[@class="title"]//text()')
print(title)
async def get_html():
result = await get_url(url)
print(result)
if __name__ == '__main__':
url = "https://sz.lianjia.com/ershoufang/pg{}"
tasks = [asyncio.ensure_future(get_url(url.format(rl))) for rl in range(1,30)] # 创建task对象
loop = asyncio.get_event_loop() # 创建事件循环对象
loop.run_until_complete(asyncio.wait(tasks)) # 将所有task对象注册到事件循环对象中
end_time = time.time()
print("执行时间{}".format(end_time - start_time)) # 执行时间6.241659641265869
import time
import requests
from lxml import etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
start_time = time.time()
def get_url():
url = "https://sz.lianjia.com/ershoufang/pg{}"
for i in range(1,30):
urli = url.format(i)
result = requests.get(urli, headers=headers).text
terr = etree.HTML(result)
ret = terr.xpath('//*[@id="content"]/div[1]/ul/li')
for li in ret:
title = li.xpath('.//div[@class="title"]//text()')
print(title)
get_url()
end_time = time.time()
print("执行时间{}".format(end_time - start_time))
# 执行时间82.57950687408447