1、该程序可根据需求取所需的省、市、县、乡镇(街道)、村(居)委会,代码为12位;

2、获取的年份可以更换;

3、可以根据需求写入数据库,方便使用,每年可更新维护一次;

4、国家统计局有访问限制,不宜一次性获取全国全部数据,建议可按照省份进行划分,分段获取 ;

实现代码如下:

import java.io.BufferedReader;

import java.io.InputStreamReader;

import java.net.CookieHandler;

import java.net.CookieManager;

import java.net.CookiePolicy;

import java.net.HttpURLConnection;

import java.net.URL;

import java.nio.charset.Charset;

import java.util.List;

/**

  • 从国家统计局获取行政区划
  • 国家统计局-NBOS(National Bureau of Statistics)

*/

    /**
     * 读省的信息
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {

        String url = baseUrl + "index.html";
        //如果需要设置代理
        //initProxy("xx.xx.xx.xx", "xx");
        String str = getContent(url).toUpperCase();
        String[] arrs = str.split("<A");
        for (String s : arrs) {
            if (s.indexOf("HREF") != -1 && s.indexOf(".HTML") != -1) {

                String a = s.substring(7, s.indexOf("'>"));
                String provinceCode = a.substring(0, 2) + "0000000000";
                System.out.println("省级CODE:" + provinceCode);
                String name = s.substring(s.indexOf("'>")+2, s.indexOf("<BR/>"));
                if(!"北京市".equals(name)){
                    continue;
                }

                System.out.println("获取省份:"+name);

                readShi(a,name);
            }
        }
    }


    /**
     * 读市的数据
     * @param list
     * @throws Exception
     */
    public static void readShi(String url,String name) throws Exception{
        String content = getContent(baseUrl+url).toUpperCase();
        String[] citys = content.split("CITYTR");
        //'><TD><A HREF='11/1101.HTML'>110100000000</A></TD><TD><A HREF='11/1101.HTML'>市辖区</A></TD></td><TR CLASS='
        for(int c=1,len=citys.length; c<len; c++){
            String[] strs = citys[c].split("<A HREF='");
            String cityUrl = null;
            String cityName = null;
            for(int si = 1; si<3; si++){
                if(si==1){//取链接和编码
                    cityUrl = strs[si].substring(0, strs[si].indexOf("'>"));
                    String cityCode = strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
                    System.out.println("cityCode:" + cityCode);
                }else{
                	cityName = name+strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
                    System.out.println("获取所属市:" + cityName);
                }
            }
            readXian(cityUrl.substring(0, cityUrl.indexOf("/")+1),cityUrl,cityName);
        }
    }

    /**
     * 读县的数据
     * @param url
     * @throws Exception
     */
    public static void readXian(String prix,String url,String cityName) throws Exception{
        String content = getContent(baseUrl+url).toUpperCase();
        String[] citys = content.split("COUNTYTR");
        for(int i=1; i<citys.length; i++){
            String cityUrl = null;
            String areaName = null;

            if(citys[i].indexOf("<A HREF='")==-1){
                String cityCode = citys[i].substring(6, 18);
                System.out.println("AreaCode:"+cityCode);
            	areaName = cityName + citys[i].substring(citys[i].indexOf("</TD><TD>")+9,citys[i].lastIndexOf("</TD>"));
                System.out.println("获取所属市辖区:" + areaName);
            }else{
                String[] strs = citys[i].split("<A HREF='");
                for(int si = 1; si<3; si++){
                    if(si==1){//取链接和编码
                        cityUrl = strs[si].substring(0, strs[si].indexOf("'>"));
                        String cityCode = strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
                        System.out.println("AreaCode:"+cityCode);
                    }else{
                    	areaName = cityName+strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
	                    System.out.println("获取所属市区(或者县级市):" + areaName);
                    }
                }
            }
            if(null!=cityUrl){
                readZhen(prix,cityUrl,areaName);
            }
        }
    }

    /**
     * 读镇的数据
     * @param url
     * @throws Exception
     */
    public static void readZhen(String prix,String url,String areaName) throws Exception{
        String content = getContent(baseUrl+prix+url).toUpperCase();
        String myPrix = (prix+url).substring(0, (prix+url).lastIndexOf("/")+1);
        String[] citys = content.split("TOWNTR");
        for(int i=1; i<citys.length; i++){
            String[] strs = citys[i].split("<A HREF='");
            String cityUrl = null;
            String towntrName = null;
            for(int si = 1; si<3; si++){
                if(si==1){//取链接和编码
                    cityUrl = strs[si].substring(0, strs[si].indexOf("'>"));
                    String cityCode = strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
                    System.out.println(cityCode);
                }else{
                	towntrName = areaName+strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
                    System.out.println("获取所属乡镇(街道办事处):" + towntrName);
                }
            }
            readCun(myPrix,cityUrl,towntrName);
        }
    }

    /**
     * 读村/街道的数据
     * @param url
     * @throws Exception
     */
    public static void readCun(String prix,String url,String towntrName) throws Exception{
        String content = getContent(baseUrl+prix+url).toUpperCase();
        String[] citys = content.split("VILLAGETR");

        for(int i=1; i<citys.length; i++){
        	//村级机构代码
        	String villageCode = null;
        	//城乡分类代码
        	String cxfldm = null;
        	//村级名称
        	String cunName = null;
            String[] strs = citys[i].split("<TD>");
            villageCode = strs[1].substring(0, strs[1].indexOf("</TD>"));
            cxfldm = strs[2].substring(0, strs[2].indexOf("</TD>"));
            cunName = towntrName + strs[3].substring(0, strs[3].indexOf("</TD>"));
            System.out.println("villageCode:" + villageCode + "***cxfldm:" + cxfldm);
            System.out.println("所属村数据:" + cunName);
        }
    }

    //设置代理
    public static void initProxy(String host, String port) {
        System.setProperty("http.proxyType", "4");
        System.setProperty("http.proxyPort", port);
        System.setProperty("http.proxyHost", host);
        System.setProperty("http.proxySet", "true");
    }

    //获取网页的内容
    public static String getContent(String strUrl) throws Exception {
        try {
        	//防止没有检查到cookie,一直循环重定向
        	CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
            URL url = new URL(strUrl);
            java.net.HttpURLConnection conn = (HttpURLConnection) url.openConnection();
            //因为没有验证通过,导致没有跳到下一个地址,还是返回当前地址。
            //这样的结果是,相同地址不断地跳回自己,变成死循环。20次以后,就报异常了
            //java.net.ProtocolException: Server redirected too many times (20)
            conn.setInstanceFollowRedirects(false);
            BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream(),Charset.forName(CHARSET)));
            String s = "";
            StringBuffer sb = new StringBuffer("");
            while ((s = br.readLine()) != null) {
                sb.append(s);
            }

            br.close();
            return sb.toString();
        } catch (Exception e) {
            System.out.println("can't open url:"+strUrl);
            throw e;
        }
    }



}

11-01 15:13