1、该程序可根据需求取所需的省、市、县、乡镇(街道)、村(居)委会,代码为12位;
2、获取的年份可以更换;
3、可以根据需求写入数据库,方便使用,每年可更新维护一次;
4、国家统计局有访问限制,不宜一次性获取全国全部数据,建议可按照省份进行划分,分段获取 ;
实现代码如下:
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.CookieHandler;
import java.net.CookieManager;
import java.net.CookiePolicy;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.List;
/**
- 从国家统计局获取行政区划
- 国家统计局-NBOS(National Bureau of Statistics)
*/
/**
* 读省的信息
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
String url = baseUrl + "index.html";
//如果需要设置代理
//initProxy("xx.xx.xx.xx", "xx");
String str = getContent(url).toUpperCase();
String[] arrs = str.split("<A");
for (String s : arrs) {
if (s.indexOf("HREF") != -1 && s.indexOf(".HTML") != -1) {
String a = s.substring(7, s.indexOf("'>"));
String provinceCode = a.substring(0, 2) + "0000000000";
System.out.println("省级CODE:" + provinceCode);
String name = s.substring(s.indexOf("'>")+2, s.indexOf("<BR/>"));
if(!"北京市".equals(name)){
continue;
}
System.out.println("获取省份:"+name);
readShi(a,name);
}
}
}
/**
* 读市的数据
* @param list
* @throws Exception
*/
public static void readShi(String url,String name) throws Exception{
String content = getContent(baseUrl+url).toUpperCase();
String[] citys = content.split("CITYTR");
//'><TD><A HREF='11/1101.HTML'>110100000000</A></TD><TD><A HREF='11/1101.HTML'>市辖区</A></TD></td><TR CLASS='
for(int c=1,len=citys.length; c<len; c++){
String[] strs = citys[c].split("<A HREF='");
String cityUrl = null;
String cityName = null;
for(int si = 1; si<3; si++){
if(si==1){//取链接和编码
cityUrl = strs[si].substring(0, strs[si].indexOf("'>"));
String cityCode = strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
System.out.println("cityCode:" + cityCode);
}else{
cityName = name+strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
System.out.println("获取所属市:" + cityName);
}
}
readXian(cityUrl.substring(0, cityUrl.indexOf("/")+1),cityUrl,cityName);
}
}
/**
* 读县的数据
* @param url
* @throws Exception
*/
public static void readXian(String prix,String url,String cityName) throws Exception{
String content = getContent(baseUrl+url).toUpperCase();
String[] citys = content.split("COUNTYTR");
for(int i=1; i<citys.length; i++){
String cityUrl = null;
String areaName = null;
if(citys[i].indexOf("<A HREF='")==-1){
String cityCode = citys[i].substring(6, 18);
System.out.println("AreaCode:"+cityCode);
areaName = cityName + citys[i].substring(citys[i].indexOf("</TD><TD>")+9,citys[i].lastIndexOf("</TD>"));
System.out.println("获取所属市辖区:" + areaName);
}else{
String[] strs = citys[i].split("<A HREF='");
for(int si = 1; si<3; si++){
if(si==1){//取链接和编码
cityUrl = strs[si].substring(0, strs[si].indexOf("'>"));
String cityCode = strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
System.out.println("AreaCode:"+cityCode);
}else{
areaName = cityName+strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
System.out.println("获取所属市区(或者县级市):" + areaName);
}
}
}
if(null!=cityUrl){
readZhen(prix,cityUrl,areaName);
}
}
}
/**
* 读镇的数据
* @param url
* @throws Exception
*/
public static void readZhen(String prix,String url,String areaName) throws Exception{
String content = getContent(baseUrl+prix+url).toUpperCase();
String myPrix = (prix+url).substring(0, (prix+url).lastIndexOf("/")+1);
String[] citys = content.split("TOWNTR");
for(int i=1; i<citys.length; i++){
String[] strs = citys[i].split("<A HREF='");
String cityUrl = null;
String towntrName = null;
for(int si = 1; si<3; si++){
if(si==1){//取链接和编码
cityUrl = strs[si].substring(0, strs[si].indexOf("'>"));
String cityCode = strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
System.out.println(cityCode);
}else{
towntrName = areaName+strs[si].substring(strs[si].indexOf("'>")+2, strs[si].indexOf("</A>"));
System.out.println("获取所属乡镇(街道办事处):" + towntrName);
}
}
readCun(myPrix,cityUrl,towntrName);
}
}
/**
* 读村/街道的数据
* @param url
* @throws Exception
*/
public static void readCun(String prix,String url,String towntrName) throws Exception{
String content = getContent(baseUrl+prix+url).toUpperCase();
String[] citys = content.split("VILLAGETR");
for(int i=1; i<citys.length; i++){
//村级机构代码
String villageCode = null;
//城乡分类代码
String cxfldm = null;
//村级名称
String cunName = null;
String[] strs = citys[i].split("<TD>");
villageCode = strs[1].substring(0, strs[1].indexOf("</TD>"));
cxfldm = strs[2].substring(0, strs[2].indexOf("</TD>"));
cunName = towntrName + strs[3].substring(0, strs[3].indexOf("</TD>"));
System.out.println("villageCode:" + villageCode + "***cxfldm:" + cxfldm);
System.out.println("所属村数据:" + cunName);
}
}
//设置代理
public static void initProxy(String host, String port) {
System.setProperty("http.proxyType", "4");
System.setProperty("http.proxyPort", port);
System.setProperty("http.proxyHost", host);
System.setProperty("http.proxySet", "true");
}
//获取网页的内容
public static String getContent(String strUrl) throws Exception {
try {
//防止没有检查到cookie,一直循环重定向
CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
URL url = new URL(strUrl);
java.net.HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//因为没有验证通过,导致没有跳到下一个地址,还是返回当前地址。
//这样的结果是,相同地址不断地跳回自己,变成死循环。20次以后,就报异常了
//java.net.ProtocolException: Server redirected too many times (20)
conn.setInstanceFollowRedirects(false);
BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream(),Charset.forName(CHARSET)));
String s = "";
StringBuffer sb = new StringBuffer("");
while ((s = br.readLine()) != null) {
sb.append(s);
}
br.close();
return sb.toString();
} catch (Exception e) {
System.out.println("can't open url:"+strUrl);
throw e;
}
}
}