学习任务
获取去哪儿网的出发地列表
获取旅游景点列表
获取景点产品列表
存储数据
1 获取出发地站点
(1)访问touch.qunar.com
(2)按F12,单击自由行,在自由行页面点击搜索框
(3)单击任意一个城市,切换到headers,查看request URL如下所示。但是需要工具还原编码咋们才能知道这是啥(dep参数表示出发地,query表示目的地)。推荐网站http://www.jsons.cn/urlencode/,解码效果下面图2
3 实现
(1)首先获得出发地站点,因为最终需要获得整个自由行的产品列表。
自由行首页中点击左侧的出发点站点,然后获取目标URL如图二
1 import requests 2 url="https://touch.dujia.qunar.com/depCities.qunar" 3 4 5 strhtml=requests.get(url) 6 print(strhtml) 7 dep_dict=strhtml.json() 8 print(dep_dict) 9 for dep_item in dep_dict['data']: 10 for dep in dep_dict['data'][dep_item]: 11 print(dep)
(2)获得目的地。根据上面的分析,json工具解码以后通过拼接可得URL。
1 url = 'https://m.dujia.qunar.com/golfz/sight/arriveRecommend?dep={}&exclude=&extensionImg=255,175'.format(urllib.request.quote(dep))
(3)总源码
1 import requests 2 import urllib 3 import time 4 #import pymongo 5 6 # client=pymongo.MongoClient('localhost',27017) 7 # book_qunar=client['qunar'] 8 # sheet_qunar_zyx=book_qunar['qunar_zyx'] 9 10 #获取产品列表 11 def get_list(dep,item): 12 url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo&dep={}&query={}&mtype=all&ddt=false&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=FreetripTouchin&et=FreetripTouch&date=&configDepNew=&needNoResult=true&originalquery={}&limit=0,20&includeAD=true&qsact=search'.format( 13 urllib.request.quote(dep), urllib.request.quote(item), urllib.request.quote(item)) 14 strhtml = get_json(url) 15 try: 16 routeCount = int(strhtml['data']['limit']['routeCount']) 17 except: 18 return 19 for limit in range(0, routeCount, 20): 20 url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo&dep={}&query={}&mtype=all&ddt=false&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=FreetripTouchin&et=FreetripTouch&date=&configDepNew=&needNoResult=true&originalquery={}&limit={},20&includeAD=true&qsact=search'.format( 21 urllib.request.quote(dep), urllib.request.quote(item), 22 urllib.request.quote(item), limit) 23 strhtml = get_json(url) 24 result = { 25 'date': time.strftime('%Y-%m-%d', time.localtime(time.time())), 26 'dep': dep, 27 'arrive': item, 28 'limit': limit, 29 'result': strhtml 30 } 31 #sheet_qunar_zyx.insert_one(result) 32 print(result) 33 34 # def connect_mongo(): 35 # client=pymongo.MongoClient('localhost',27017) 36 # book_qunar=client['qunar'] 37 # return book_qunar['qunar_zyx'] 38 39 40 def get_json(url): 41 strhtml=requests.get(url) 42 time.sleep(1) 43 return strhtml.json() 44 45 if __name__ == "__main__": 46 47 url='https://touch.dujia.qunar.com/depCities.qunar' 48 dep_dict=get_json(url) 49 #这里是json格式 dep_dict中内嵌勒一层 50 for dep_item in dep_dict['data']: 51 for dep in dep_dict['data'][dep_item]: 52 a = []#目的地去重 53 #经过解码工具可以得到dep表示出发地 query和originalquery表示目的地 54 url = 'https://m.dujia.qunar.com/golfz/sight/arriveRecommend?dep={}&exclude=&extensionImg=255,175'.format(urllib.request.quote(dep)) 55 arrive_dict = get_json(url) 56 for arr_item in arrive_dict['data']: 57 for arr_item_1 in arr_item['subModules']: 58 for query in arr_item_1['items']: 59 if query['query'] not in a: 60 a.append(query['query']) 61 for item in a: 62 get_list(dep,item)