需求:给定进百万小区名称及对应城市信息,通过高德获取其周边设施信息(交通、医疗、教育、生活设施)
高德JS API
爬取灵感来自高德开发支持的JS API,当然你也可以直接调用高德提供的api服务获取地址信息,但是api服务是有次数限制。而JS API的调用并没有次数限制,经我的验证一个爬取节点控制在500毫秒的抓取频率是没有限制的。
https://lbs.amap.com/api/javascript-api/example/poi-search/keywords-search
高德地址信息获取原理
通过JS引入高德的AMap对象,调用相关方法获取对应数据。JS调用高德地图显示周边信息也是这个逻辑。
准备工作
注册高德地图账号->控制台创建应用->申请Key
需要申请Key才可以在自己js代码中使用高德js
数据抓取
数据服务API:
读取小区数据放入list中 每来一次请求从list中pop一条数据返回,这样前端可以多个节点多个任务同时抓取。
数据获取:
要想使用高德 需引入如下2个js,其中的key即为高德官网申请的key
<script src="http://cache.amap.com/lbs/static/jquery-1.9.1.js"></script>
<script src="http://webapi.amap.com/maps?v=1.3&key=************&plugin=AMap.ArrivalRange,AMap.Scale,AMap.Geocoder,AMap.Transfer,AMap.Autocomplete" id="amap_key"></script>
get请求获取小区数据,创建高德对象
$.ajax({
type:"get",
url:"http://127.0.0.1:5075/api/community",
dataType:"json",
contentType:"application/json",
success:function(data){
params = data;
console.log('Getapi data',data)
MAP = new AMap.Map("container", {
resizeEnable: true
});
getAddress(data)
}
});
调用高德JS搜素小区信息:
1.在AMap.service方法中构造构造地点查询类
2.调用地点查询类的search方法返回结果result
3.使用返回结果中的小区经纬度调用searchNearBy去获取周边数据
AMap.service(["AMap.PlaceSearch"], function() {
var placeSearch = new AMap.PlaceSearch({ //构造地点查询类
pageSize: 3,
pageIndex: 1,
city: city, //城市
map: MAP,
type:"商务住宅|门牌信息",
citylimit:true,
extensions:"all",
// panel: "panel"
});
//关键字查询
placeSearch.search(keyword, function(status, result) {
console.log('getAddress info',result.info,result);
searchNearBy(city,cpoint,keyword);
});
});
前端打印的搜索结果,获取的信息非常详细包括省市县及其编码,以及经纬度信息等:
完整代码
将代码保存至index.html, 在同级目录下python(python -m SimpleHTTPServer 8000)本地启动http服务。
浏览器请求:http://localhost:8000?key=***************************
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="initial-scale=1.0, user-scalable=no, width=device-width">
<title>高德房源地址获取</title>
<script src="http://cache.amap.com/lbs/static/jquery-1.9.1.js"></script>
<!-- <script src="http://webapi.amap.com/maps?v=1.3&key=01aa9e655798548a06c7697db75a9903&plugin=AMap.ArrivalRange,AMap.Scale,AMap.Geocoder,AMap.Transfer,AMap.Autocomplete" id="amap_key"></script> -->
<script type="text/javascript">
// 根据请求url获取小区信息 及构造高德js
function GetRequest() {
var url = decodeURIComponent(location.search); //获取url中"?"符后的字串
var theRequest = new Object();
if (url.indexOf("?") != -1) {
var str = url.substr(1);
strs = str.split("&");
for(var i = 0; i < strs.length; i ++) {
theRequest[strs[i].split("=")[0]]=unescape(strs[i].split("=")[1]);
}
}
return theRequest;
}
var params = GetRequest();
var key = params.key;
// 动态构造引入高德js
var mapSrc = 'http://webapi.amap.com/maps?v=1.3&key='+key+'&plugin=AMap.ArrivalRange,AMap.Scale,AMap.Geocoder,AMap.Transfer,AMap.Autocomplete';
console.log(mapSrc)
document.write('<script src='+mapSrc+'></sc' + 'ript>')
</script>
</head>
<body id='body'>
<div id="container"></div>
<script type="text/javascript">
function send_mongodb(save_data){
save_url = decodeURIComponent(location.href)+'&'+params.id;
console.log('amap_data done:',save_data,save_url)
params = {
"_template": "amap_gps_nearby",
"_title": save_data.community+"周边信息",
"_timestamp": Date.parse(new Date()),
"_hostname": '',
"_url": save_url,
"_data": {
"data":save_data
},
"_timestampstr": new Date()
}
$.ajax({
type:"POST",
url:"http://127.0.0.1:5125/upload",
dataType:"json",
contentType:"application/json",
data:JSON.stringify(params),
success:function(data){
console.log('POST mongodb:',data)
}
});
setTimeout(function(){
flag_over = true;
// location.reload();
}, 400);
}
function getAddress(params) {
// alert('')
var city = params.city;
var district = params.district;
var keyword = params.xiaoqu;
var xiaoqu_code = params.xiaoqu_code;
var xiaoqu = keyword;
console.log('---------------------',city,keyword);
AMap.service(["AMap.PlaceSearch"], function() {
var placeSearch = new AMap.PlaceSearch({ //构造地点查询类
pageSize: 3,
pageIndex: 1,
city: city, //城市
map: MAP,
type:"商务住宅|门牌信息",
citylimit:true,
extensions:"all",
// panel: "panel"
});
//关键字查询
placeSearch.search(keyword, function(status, result) {
if(status==0){
console.log('请求失败')
return
}
console.log('getAddress info',result.info,result);
if(keyword!='' && result.poiList &&result.poiList.pois.length>0){
var detail = [];
var detail = result.poiList.pois[0];
for(i=0; i<result.poiList.pois.length; i++){
// 对返回的数据通过区县进行二次筛选
var item = result.poiList.pois[i];
if(item.adname && district && (item.adname.indexOf(district)!=-1 || district.indexOf(item.adname)!=-1) &&
(xiaoqu.indexOf(item.name)!=-1 || item.name.indexOf(xiaoqu)!=-1)){
detail = item;
break;
}
}
var cpoint = [detail["location"]["lng"],detail["location"]["lat"]];
save_data = {"city":city,"district":district,"community":xiaoqu, "community_code":xiaoqu_code,"detail":detail}
searchNearBy(city,cpoint,keyword);
}else{
flag_over = true;
console.log(keyword+' not find in amap');
// location.reload();
}
});
});
}
function searchNearBy(city,cpoint,keyword){
console.log('searchNearBy:',city,cpoint,keyword)
var placeSearchNearBy = new AMap.PlaceSearch({ //构造地点查询类
pageSize: 5,
type: '',
pageIndex: 1,
city: city, //城市
citylimit:true,
map: MAP,
});
var nearby = {};
// len = 30;
var types = [
{'subway':'地铁站'},{'bus':'公交车站'},{'parking_lot':'停车场'},{'other_traffic':'飞机场|火车站'},
{'kids_school':'幼儿园'},{'primary_school':'小学'},{'middle_school':'初中'},{'high_school':'高等院校'},{'college':'大学'},{'train_school':'培训机构'},{'library':'图书馆'},{'science_museum':'科技馆'},{'AAA_hospital':'三级甲等医院'},{'special_hospital':'专科医院'},{'hospital':'综合医院'},{'clinic':'诊所'},{'pharmacy':'医药保健销售店'},{'shopping':'商场'},{'store':'便民商店|便利店'},{'supermarket':'超级市场'},{'comprehensive_market':'综合市场'},{'bank':'银行'},{'ATM':'ATM'},{'food':'中餐厅|外国餐厅|快餐厅'},{'drink':'咖啡厅|茶艺馆|冷饮店|甜品店'},{'park':'公园'},{'movies':'电影院'},{'sports':'运动场馆'},{'entertainment':'娱乐场所'},{'gym':'健身中心'}
]
Search(placeSearchNearBy, types)
function Search(placeSearchNearBy, types){
if(types.length==0){
flagSuccess = true;
save_data['nearby'] = nearby;
send_mongodb(save_data);
return
}
var item = types.pop();
for(var name in item){
type = item[name]
}
placeSearchNearBy.setType(type);
placeSearchNearBy.searchNearBy('', cpoint, 5000, function(status, result) {
if(result.poiList && result.poiList.pois.length>0){
nearby[name] = result.poiList.pois;
}
else{
nearby[name] = [];
}
setTimeout(function(){
Search(placeSearchNearBy, types)
}, 400);
});
}
}
save_data = {};
setInterval(function(){
location.reload();
}, 1000*60*10);
$.ajax({
type:"get",
url:"http://127.0.0.1:5075/api/community",
dataType:"json",
contentType:"application/json",
success:function(data){
params = data;
console.log('Getapi data',data)
setTimeout(function(){
MAP = new AMap.Map("container", {
resizeEnable: true
});
getAddress(data)
}, 400*10);
}
});
</script>
</body>
</html>