需求:给定进百万小区名称及对应城市信息,通过高德获取其周边设施信息(交通、医疗、教育、生活设施)

高德JS API

爬取灵感来自高德开发支持的JS API,当然你也可以直接调用高德提供的api服务获取地址信息,但是api服务是有次数限制。而JS API的调用并没有次数限制,经我的验证一个爬取节点控制在500毫秒的抓取频率是没有限制的。
https://lbs.amap.com/api/javascript-api/example/poi-search/keywords-search

高德地址信息获取原理

通过JS引入高德的AMap对象,调用相关方法获取对应数据。JS调用高德地图显示周边信息也是这个逻辑。

准备工作

注册高德地图账号->控制台创建应用->申请Key
需要申请Key才可以在自己js代码中使用高德js

数据抓取

数据服务API:
读取小区数据放入list中 每来一次请求从list中pop一条数据返回,这样前端可以多个节点多个任务同时抓取。

数据获取:
要想使用高德 需引入如下2个js,其中的key即为高德官网申请的key

<script src="http://cache.amap.com/lbs/static/jquery-1.9.1.js"></script>
<script src="http://webapi.amap.com/maps?v=1.3&key=************&plugin=AMap.ArrivalRange,AMap.Scale,AMap.Geocoder,AMap.Transfer,AMap.Autocomplete" id="amap_key"></script>

get请求获取小区数据,创建高德对象

$.ajax({
        type:"get",
        url:"http://127.0.0.1:5075/api/community",
        dataType:"json",
        contentType:"application/json",
        success:function(data){
            params = data;
            console.log('Getapi data',data)
            MAP = new AMap.Map("container", {
                resizeEnable: true
            });
            getAddress(data)
        }
     });

调用高德JS搜素小区信息:
1.在AMap.service方法中构造构造地点查询类
2.调用地点查询类的search方法返回结果result
3.使用返回结果中的小区经纬度调用searchNearBy去获取周边数据

AMap.service(["AMap.PlaceSearch"], function() {
            var placeSearch = new AMap.PlaceSearch({ //构造地点查询类
                pageSize: 3,
                pageIndex: 1,
                city: city, //城市
                map: MAP,
                type:"商务住宅|门牌信息",
                citylimit:true,
                extensions:"all",
                // panel: "panel"
            });
            //关键字查询

            placeSearch.search(keyword, function(status, result) {
                console.log('getAddress info',result.info,result);

                searchNearBy(city,cpoint,keyword);
            });
        });

前端打印的搜索结果,获取的信息非常详细包括省市县及其编码,以及经纬度信息等:

完整代码

将代码保存至index.html, 在同级目录下python(python -m SimpleHTTPServer 8000)本地启动http服务。
浏览器请求:http://localhost:8000?key=***************************

<!doctype html>
<html>

<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="initial-scale=1.0, user-scalable=no, width=device-width">
    <title>高德房源地址获取</title>
    <script src="http://cache.amap.com/lbs/static/jquery-1.9.1.js"></script>
    <!-- <script src="http://webapi.amap.com/maps?v=1.3&key=01aa9e655798548a06c7697db75a9903&plugin=AMap.ArrivalRange,AMap.Scale,AMap.Geocoder,AMap.Transfer,AMap.Autocomplete" id="amap_key"></script> -->
    <script type="text/javascript">
        // 根据请求url获取小区信息 及构造高德js
        function GetRequest() {
           var url = decodeURIComponent(location.search); //获取url中"?"符后的字串
           var theRequest = new Object();
           if (url.indexOf("?") != -1) {
              var str = url.substr(1);
              strs = str.split("&");
              for(var i = 0; i < strs.length; i ++) {
                 theRequest[strs[i].split("=")[0]]=unescape(strs[i].split("=")[1]);
              }
           }
           return theRequest;
        }

        var params = GetRequest();
        var key = params.key;
       // 动态构造引入高德js
        var mapSrc = 'http://webapi.amap.com/maps?v=1.3&key='+key+'&plugin=AMap.ArrivalRange,AMap.Scale,AMap.Geocoder,AMap.Transfer,AMap.Autocomplete';
        console.log(mapSrc)
        document.write('<script src='+mapSrc+'></sc' + 'ript>')
    </script>

</head>

<body id='body'>
<div id="container"></div>

<script type="text/javascript">
    function send_mongodb(save_data){
        save_url =  decodeURIComponent(location.href)+'&'+params.id;
        console.log('amap_data done:',save_data,save_url)
        params = {
          "_template": "amap_gps_nearby",
          "_title": save_data.community+"周边信息",
          "_timestamp": Date.parse(new Date()),
          "_hostname": '',
          "_url": save_url,
          "_data": {
            "data":save_data
            },
        "_timestampstr": new Date()
        }
        $.ajax({
            type:"POST",
            url:"http://127.0.0.1:5125/upload",
            dataType:"json",
            contentType:"application/json",
            data:JSON.stringify(params),
            success:function(data){
                console.log('POST mongodb:',data)
            }
         });
        setTimeout(function(){
            flag_over = true;
            // location.reload();
        }, 400);

    }

    function getAddress(params) {
        // alert('')
        var city = params.city;
        var district = params.district;
        var keyword = params.xiaoqu;
        var xiaoqu_code = params.xiaoqu_code;
        var xiaoqu = keyword;
        console.log('---------------------',city,keyword);

        AMap.service(["AMap.PlaceSearch"], function() {
            var placeSearch = new AMap.PlaceSearch({ //构造地点查询类
                pageSize: 3,
                pageIndex: 1,
                city: city, //城市
                map: MAP,
                type:"商务住宅|门牌信息",
                citylimit:true,
                extensions:"all",
                // panel: "panel"
            });
            //关键字查询

            placeSearch.search(keyword, function(status, result) {
                if(status==0){
                    console.log('请求失败')
                    return
                }
                console.log('getAddress info',result.info,result);
                if(keyword!='' && result.poiList &&result.poiList.pois.length>0){
                    var detail = [];
                    var detail = result.poiList.pois[0];
                    for(i=0; i<result.poiList.pois.length; i++){
                        // 对返回的数据通过区县进行二次筛选
                        var item = result.poiList.pois[i];
                        if(item.adname && district && (item.adname.indexOf(district)!=-1 || district.indexOf(item.adname)!=-1) &&
                            (xiaoqu.indexOf(item.name)!=-1 || item.name.indexOf(xiaoqu)!=-1)){
                            detail = item;
                            break;
                        }
                    }
                    var cpoint = [detail["location"]["lng"],detail["location"]["lat"]];
                    save_data = {"city":city,"district":district,"community":xiaoqu, "community_code":xiaoqu_code,"detail":detail}
                    searchNearBy(city,cpoint,keyword);
                }else{
                    flag_over = true;
                    console.log(keyword+' not find in amap');
                    // location.reload();
                }
            });
        });
    }

    function searchNearBy(city,cpoint,keyword){
        console.log('searchNearBy:',city,cpoint,keyword)
        var placeSearchNearBy = new AMap.PlaceSearch({ //构造地点查询类
                pageSize: 5,
                type: '',
                pageIndex: 1,
                city: city, //城市
                citylimit:true,
                map: MAP,
            });
        var nearby = {};
        // len = 30;
        var types = [
            {'subway':'地铁站'},{'bus':'公交车站'},{'parking_lot':'停车场'},{'other_traffic':'飞机场|火车站'},
            {'kids_school':'幼儿园'},{'primary_school':'小学'},{'middle_school':'初中'},{'high_school':'高等院校'},{'college':'大学'},{'train_school':'培训机构'},{'library':'图书馆'},{'science_museum':'科技馆'},{'AAA_hospital':'三级甲等医院'},{'special_hospital':'专科医院'},{'hospital':'综合医院'},{'clinic':'诊所'},{'pharmacy':'医药保健销售店'},{'shopping':'商场'},{'store':'便民商店|便利店'},{'supermarket':'超级市场'},{'comprehensive_market':'综合市场'},{'bank':'银行'},{'ATM':'ATM'},{'food':'中餐厅|外国餐厅|快餐厅'},{'drink':'咖啡厅|茶艺馆|冷饮店|甜品店'},{'park':'公园'},{'movies':'电影院'},{'sports':'运动场馆'},{'entertainment':'娱乐场所'},{'gym':'健身中心'}
            ]

        Search(placeSearchNearBy, types)
        function Search(placeSearchNearBy, types){
            if(types.length==0){
                flagSuccess = true;
                save_data['nearby'] = nearby;
                send_mongodb(save_data);
                return
            }
            var item = types.pop();
            for(var name in item){
                type = item[name]
            }
            placeSearchNearBy.setType(type);
            placeSearchNearBy.searchNearBy('', cpoint, 5000, function(status, result) {
                if(result.poiList && result.poiList.pois.length>0){
                    nearby[name] = result.poiList.pois;
                }
                else{
                    nearby[name] = [];
                }
                setTimeout(function(){
                    Search(placeSearchNearBy, types)
                }, 400);

            });
        }
    }

    save_data = {};

    setInterval(function(){
        location.reload();
    }, 1000*60*10);

    $.ajax({
            type:"get",
            url:"http://127.0.0.1:5075/api/community",
            dataType:"json",
            contentType:"application/json",
            success:function(data){
                params = data;
                console.log('Getapi data',data)

                setTimeout(function(){
                    MAP = new AMap.Map("container", {
                        resizeEnable: true
                    });
                    getAddress(data)

                }, 400*10);
            }
         });

</script>
</body>
</html>
01-25 21:41