近期参加一个课题,聊到路线规划问题,需要搜索两地点的最短线路距离以及最短用时等情况,然后就想着用借用百度API,做个参考

环境:

  python 3.6

主要问题:

1. 分析百度官方路线规划API了解到路线规划需要提供经纬度信息,于是借用百度地理编码是指将地址或地名等位置描述转换为经纬度坐标的过程。得到的坐标信息,可以用于制图或空间分析操作。(出于国家安全考虑,公布出来的坐标信息一般是经过加偏的。)http://lbsyun.baidu.com/index.php?title=webapi/guide/webservice-geocoding
2. 根据经纬度信息,现有起点和终点坐标值(经纬度lng、lat),目的是通过百度地图开发者平台的路线规划功能获取起点终点路线规划距离和预估时长,百度地图开发者平台路线规划使用说明网址为:http://lbsyun.baidu.com/index.php?title=webapi/direction-api-abroad
3.爬取过程可能会由于服务器或者参数不满足要求导致爬虫中断,注意处理这个问题即可

4.一是注意源文件的数据格式要转utf-8;二是修改文件路径;三是AK需要自行去开发者平台申请。

代码如下(由于文件不方便上传,只需稍作修改,即可使用):

# -*- coding:utf-8 -*-
# ------------------------------
# @Time :2019/5/9 13:32
# @Author :jonie
# @Email :
# @File :code_get.py
# Description:
# ------------------------------
import csv
import json
import time
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen, quote
import json
import requests
# [113.63095213159264, 34.74830559988335]#
origin_path = 'data/赛点.csv' # 原始数据文件路径
new_path = 'data/地址对应坐标.txt' # 爬取数据文件保存路径 machine_data = csv.reader(open(origin_path, 'r', encoding='utf-8')) # 读取原始文件数据 for addr in machine_data: # 循环爬取每一条数据
# print(addr[2]) address = addr[1]
ak = 'FA8atAaqd1wajikD56lPqtiaNCldeya'
url = 'http://api.map.baidu.com/geocoder/v2/?address='
output = 'json'
# ak = '你的ak'#需填入自己申请应用后生成的ak
add = quote(address) # 本文城市变量为中文,为防止乱码,先用quote进行编码
url2 = url + add + '&output=' + output + "&ak=" + ak
req = urlopen(url2)
res = req.read().decode()
temp = json.loads(res)
lng = temp['result']['location']['lng'] # 获取经度
lat = temp['result']['location']['lat'] # 获取纬度
lng = ("%.5f" % lng)
lat = ("%.5f" % lat) list1 = [lng, lat,addr[0]]
print('百度坐标为:', list1)
with open(new_path, 'a', encoding='utf-8') as f:
f.write(str(list1))
f.write('\n')
f.close() with open("data/赛点信息.csv", 'a', newline='',encoding='utf-8') as t: # numline是来控制空的行数的
writer = csv.writer(t) # 这一步是创建一个csv的写入器(个人理解)
writer.writerow(list1) # 写入标签
# writer.writerows(n) # 写入样本数据
t.close()

调用百度地图api获取起点终点路线规划距离和预估时长代码

 import csv
import re
import time
import json
from urllib.request import urlopen
import urllib # 原数据文件格式csv: 起点纬度 + 起点经度 + 索引 + 终点纬度 + 终点经度
origin_path = 'data/b.csv' # 原始数据文件路径
result_path = 'data/result122901.txt' # 爬取数据文件保存路径 # 百度地图提供的api服务网址
url_drive = r"http://api.map.baidu.com/direction/v2/driving" # 驾车(routematrix 批量算路)
url_ride = r'http://api.map.baidu.com/routematrix/v2/riding?output=json' # 骑行
url_walk = r'http://api.map.baidu.com/routematrix/v2/walking?output=json' # 步行
url_bus = r'http://api.map.baidu.com/direction/v2/transit?output=json' # bus(direction路线规划)
cod = r"&coord_type=bd09ll"
# 声明坐标格式,bd09ll(百度经纬度坐标);bd09mc(百度摩卡托坐标);gcj02(国测局加密坐标),wgs84(gps设备获取的坐标)
# AK为从百度地图网站申请的秘钥,额度不够的时候直接在list后面新增AK就行
AK = ['FA8atAaqd1wajikD56lPqtiasdfleCeyz']
# 把变量名先写入文件
colnames = '设备序列号 起点 终点 状态码 步行路程(米) 步行耗时(秒)'
with open(result_path, 'a', encoding='utf-8') as f:
f.write(colnames)
f.write('\n')
f.close() address = csv.reader(open(origin_path, 'r', encoding='utf-8')) # 读取原始文件数据 # for ad in address:
# # print(ad)
# print(ad[0])
# print(ad[1])
# print(ad[2])
# print(ad[3])
# print(ad[4])
n = 0
akn1 = 0
akn2 = 0
a = 0
while True:
try: # 避开错误:文件编码问题、服务器响应超时、
for ad in address:
if (akn1 < len(AK)) and (akn2 < len(AK)): # 配额是否够
mac_code = str(ad[2]) # 设备序列号
try:
ori = str(ad[0]) + ',' + str(ad[1]) # 起点
des = str(ad[3]) + ',' + str(ad[4]) # 终点
ak_drive = AK[akn1]
ak_bus = AK[akn2]
ak_drive2 = r'&ak=' + ak_drive
ak_bus2 = r'&ak=' + ak_bus
ori1 = r"?origin=" + ori
des1 = r"&destination=" + des
# 以下是自驾车
tac_type = r'&tactics=11' # 驾车路径:常规路线
# 10不走高速;11常规路线;12距离较短;13距离较短(不考虑路况) 只对驾车有效
aurl_drive = url_drive + ori1 + des1 + cod + tac_type + ak_drive2 # 驾车规划网址
res_drive = urlopen(aurl_drive) # 打开网页
cet_drive = res_drive.read() # 解析内容
res_drive.close() # 关闭
result_drive = json.loads(cet_drive) # json转dict
status = result_drive['status']
print('驾车码', status)
if status == 0: # 状态码为0:无异常
m_drive = result_drive['result']["routes"][0]['distance'] # 里程(米)
m_drive2 = float(m_drive) # str转float
timesec_drive = result_drive['result']["routes"][0]['duration'] # 耗时(秒)
diss_drive = '状态' + str(status) + ' ' + str(m_drive) + ' ' + str(timesec_drive) # 驾车总
elif status == 302 or status == 210 or status == 201: # 302:额度不足;210:IP验证未通过
m_drive2 = 10000 # 赋值(大于5km),即不爬取步行规划
akn1 += 1
diss_drive = '状态' + str(status) + ' break break'
else:
m_drive2 = 10000 # 赋值(大于5km),即不爬取步行规划
diss_drive = '状态' + str(status) + ' na na'
try: # 当驾车规划m_drive2为空的时候,if语句发生错误
if 0 < m_drive2 < 5000: # 里程低于5公里则爬取步行规划
aurl_walk = url_walk + ori1 + des1 + cod + ak_drive2 # 步行规划网址
res_walk = urlopen(aurl_walk) # 打开网页
cet_walk = res_walk.read() # 解析内容
result_walk = json.loads(cet_walk) # json转dict
res_walk.close() # 关闭网页
status_walk = result_walk['status'] # 状态码
if status_walk == 0: # 状态正常
m_walk = result_walk['result']["routes"][0]['distance'] # 步行距离
time_walk = result_walk['result']["routes"][0]['duration'] # 步行时间
diss_walk = str(m_walk) + ' ' + str(time_walk) # 步行总
else: # 状态异常
diss_walk = 'na na'
else: # 里程大于5km则不爬取步行规划
diss_walk = 'na na'
except: # 发生错误时步行数据也赋值为na
diss_walk = 'na na'
pass
# 以下是乘车规划
tac_bus = r'&tactics_incity=0'
# 市内公交换乘策略 可选,默认为0 0推荐;1少换乘;2少步行;3不坐地铁;4时间短;5地铁优先
city_bus = r'&tactics_intercity=0'
# 跨城公交换乘策略 可选,默认为0 0时间短;1出发早;2价格低;
city_type = r'&trans_type_intercity=2'
# 跨城交通方式策略 可选,默认为0 0火车优先;1飞机优先;2大巴优先;
ori2 = r"&origin=" + ori
des2 = r"&destination=" + des
aurl_bus = url_bus + ori2 + des2 + tac_bus + city_bus + city_type + ak_bus2
res_bus = urlopen(aurl_bus)
cet_bus = res_bus.read()
res_bus.close()
result_bus = json.loads(cet_bus)
status = result_bus['status']
print('乘车码', status)
# --------------------------------------
# if status == 0:
# rsls = result_bus['result']['routes']
# if rsls == []: # 无方案时状态也为0,但只返回一个空list
# diss_bus = '状态' + str(status) + ' ' + '无公交方案'
# else:
# m_bus = result_bus['result']['routes'][0]['distance'] # 乘车路线距离总长(米)
# time_bus = result_bus['result']['routes'][0]['duration'] # 乘车时间(秒)
# cost_bus = result_bus['result']['routes'][0]['price'] # 乘车费用(元)
# diss_bus = '状态' + str(status) + ' ' + str(m_bus) + ' ' + str(time_bus) + ' ' + str(cost_bus)
# elif status == 302 or status == 210 or status == 201:
# akn2 = akn2 + 1
# diss_bus = '状态' + str(status) + ' ' + '更换AK断点'
# else: # 其他类型状态码(服务器错误)
# diss_bus = '状态' + str(status) + ' ' + '服务器错误'
# -----------------------------------------------
# 汇总数据
diss = mac_code + ' ' + str(ori) + ' ' + str(
des) + ' ' + diss_drive + ' ' + diss_walk #+ ' ' + diss_bus
with open(result_path, 'a', encoding='utf-8') as f:
f.write(diss)
f.write('\n')
f.close()
n += 1
print('第' + str(n) + '条已完成')
except:
time.sleep(3)
diss_wrong = str(mac_code) + '未知错误'
with open(result_path, 'a', encoding='utf-8') as f:
f.write(diss_wrong)
f.write('\n')
f.close()
continue
else:
print('配额不足!')
break
except:
time.sleep(3)
print('未知错误')
with open(result_path, 'a', encoding='utf-8') as f:
f.write('未知错误')
f.write('\n')
f.close()
continue
print('程序已停止运行')
break # 跑完数时break打断while循环,for循环的话这里不好定义循环条件

最终根据生成的数据作图如下:

百度API之路线规划-LMLPHP

百度API之路线规划-LMLPHP

附录:

1.数据获取(借助携程网爬取郑州市以好评度优先的方式爬取所有星级酒店信息)
 import requests
import random
from bs4 import BeautifulSoup
import time
import csv
import json
import re
import pandas as pd
import numpy as np pd.set_option('display.max_columns', 10000)
pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_colwidth', 10000)
pd.set_option('display.width',1000) # Beijing 5 star hotel list url
five_star_url = "http://hotels.ctrip.com/Domestic/Tool/AjaxHotelList.aspx"
filename = "data/star hotel list.csv" def Scrap_hotel_lists():
"""
It aims to crawl the 5 star hotel lists in Beijing and save in a csv file.
"""
headers = {
"Connection": "keep-alive",
"origin": "http://hotels.ctrip.com",
"Host": "hotels.ctrip.com",
"referer": "https://hotels.ctrip.com/hotel/zhengzhou559",
"user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36",
"Content-Type":"application/x-www-form-urlencoded; charset=utf-8"
} id = []
name = []
hotel_url = []
address = []
score = [] # 8 pages
for page in range(1,8): data = {
"StartTime": "2019-09-08", # The value depends on the date you want to scrap.
"DepTime": "2019-09-18",
"RoomGuestCount": "0,1,2",
"cityId": 559,
"cityPY": " zhengzhou",
"cityCode": "",
"cityLat": 34.758044,
"cityLng": 113.673121,
"page": page,
"star": "",
"orderby": 3
}
html = requests.post(five_star_url, headers=headers, data=data) # print(html.text)
j= json.loads(html.text.replace("\洛阳","洛阳"))
#hotel_list = html.json()["totalMsg"]
hotel_list = j["hotelPositionJSON"] for item in hotel_list:
id.append(item['id'])
name.append(item['name'])
hotel_url.append(item['url'])
address.append(item['address'])
score.append(item['score']) time.sleep(random.randint(3,5))
hotel_array = np.array((id, name, score, hotel_url, address)).T
list_header = ['id', 'name', 'score', 'url', 'address']
array_header = np.array((list_header))
hotellists = np.vstack((array_header, hotel_array))
with open(filename, 'a', encoding="utf-8-sig", newline="") as f:
csvwriter = csv.writer(f, dialect='excel')
csvwriter.writerows(hotellists) def hotel_detail(hotel_id):
"""
It aims to scrap the detailed information of a specific hotel.
"""
headers = {"Connection": "keep-alive",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
"Host": "hotels.ctrip.com",
"If-Modified-Since": "Thu, 01 Jan 1970 00:00:00 GMT",
"Referer": "http://hotels.ctrip.com/hotel/2231618.html",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/69.0.3497.92 Safari/537.36"
} basic_url = "http://hotels.ctrip.com/Domestic/tool/AjaxHote1RoomListForDetai1.aspx?hotel="
url = basic_url + str(hotel_id) r = requests.get(url, headers=headers)
# Response is a json object.
html = r.json()['html']
soup = BeautifulSoup(html, "lxml")
rooms = soup.findAll('td', attrs={"class": "child_name J_Col_RoomName"}) RoomID = []
RoomName = []
LowPrice = []
RoomSize = []
RoomLevel = []
IsAddBed = []
BedSize = []
CustomerNum = [] # Regex Pattern
baseroom_pattern = re.compile(r'<[^>]+>') # r'<[^>]+>' for idx in range(len(rooms)):
if rooms[idx].has_attr(key='data-baseroominfo'):
room_info_str = rooms[idx]['data-baseroominfo']
room_info_json = json.loads(room_info_str)
RoomID.append(str(room_info_json["RoomID"]))
RoomName.append(room_info_json["RoomName"])
LowPrice.append(room_info_json["LowPrice"]) baseroom_info = room_info_json["BaseRoomInfo"]
# print(type(baseroom_info))
# <class 'str'>
remove_tag = baseroom_pattern.sub("", baseroom_info)
RoomDetailInfo = remove_tag.split("|")
if len(RoomDetailInfo) == 4:
RoomDetailInfo.insert(3, None) RoomSize.append(RoomDetailInfo[0])
RoomLevel.append(RoomDetailInfo[1])
BedSize.append(RoomDetailInfo[2])
IsAddBed.append(RoomDetailInfo[3])
CustomerNum.append(RoomDetailInfo[4])
else:
continue RoomInfo = np.array((RoomID, RoomName, LowPrice, RoomSize, RoomLevel, BedSize, IsAddBed, CustomerNum)).T
# Create a DataFrame object
# print(RoomInfo)
column_name = ['RoomID', 'RoomName', 'LowPrice', 'RoomSize', 'RoomLevel', 'BedSize', 'IsAddBed', 'CustomerNum']
df = pd.DataFrame(data=RoomInfo, columns=column_name)
print(df) if __name__ == "__main__": # # 1. Scrap 5 star hotel list in Beijing
Scrap_hotel_lists() # 2. Scrap the detailed hotel information
df = pd.read_csv(filename, encoding='utf8')
print("1. Beijing 5 Star Hotel Lists")
print(df)
hotelID = df["id"]
print('\n') while True:
print("2.1 If you find to search the detail hotel information, please input the hotel index in the DataFrame.")
print("2.2 If you want to quit, input 'q'.") print("Please input the Parameter: ")
input_param = input()
if input_param.isnumeric():
hotel_index = int(input_param)
if 0 <= hotel_index <= 170:
print("3. The detail information of the Hotel:")
hotel_detail(hotelID[hotel_index])
else:
print('Hotel Index out of range! ')
print('Remember: 0 <= Hotel Index <= 170')
print('Please input again.')
continue
elif input_param == 'q':
print('See you later!')
break
else:
print('Invalid Input!')
print('\n')
continue
2.根据生成数据绘制酒店信息云图
from pyecharts import WordCloud
import random
name1 =hotel_list2
random_list =[296, 630, 。。。] # 也可以通过一下三行生成随机整数列表
# for i in range(len(name1)):
# #随机产生len(name1)个300-10000整数
# random_list.append(random.randint(300,800))
# print('生成的随机整数列表为:\n',random_list) value =random_list
wordcloud =WordCloud(width=1300, height=800)
wordcloud.add("酒店信息", name1, value, word_size_range=[10,20], shape='pentagon')
wordcloud.show_config()
wordcloud.render()

百度API之路线规划-LMLPHP

Note:以上纯属娱乐学习之用。

05-11 19:29