您好,登錄后才能下訂單哦!
今天小編給大家分享一下Java如何實現(xiàn)獲取行政區(qū)劃的相關知識點,內容詳細,邏輯清晰,相信大部分人都還太了解這方面的知識,所以分享這篇文章給大家參考一下,希望大家閱讀完這篇文章后有所收獲,下面我們一起來了解一下吧。
下面是筆者用到的全部jar包
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.9</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>30.1.1-jre</version> </dependency> <dependency> <groupId>cn.hutool</groupId> <artifactId>hutool-json</artifactId> <version>5.4.0</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.44</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.14.3</version> </dependency> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.5</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> <version>2.5.4</version> </dependency>
這里說下,maven配置要從阿里云下載jar,若是從中央倉庫將會非常的慢。
這里是代碼的展示,筆者是網(wǎng)上搜的代碼改造的,不然網(wǎng)站有反爬,大概爬取2000條左右就會中斷,筆者加了延時這樣就避開了反爬(可能還有別的規(guī)避措施)。這里爬取的是4級行政區(qū)劃:省、市、區(qū)縣、街道
package com.cheng.controller; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.xssf.streaming.SXSSFSheet; import org.apache.poi.xssf.streaming.SXSSFWorkbook; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import java.io.FileOutputStream; import java.io.IOException; import java.net.ConnectException; import java.net.SocketTimeoutException; import java.util.*; /** @author pcc @version 1.0.0 @className JsoupTest @date 2023-03-02 10:39 */ public class JsoupTestPluMdm { static int i = 1; static String url1 = “http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2022”; static String url2 = “”; public static void main(String[] args) throws IOException { try{ List<Map<String,String>> listMap = new ArrayList<>(); Document document = Jsoup.connect(url1) .header("Cookie", "wzws_sessionid=oGQAAyWBMmNlMWZkgjdlZDJkMIAyMjEuMjM4LjEzMi41MA==; SF_cookie_1=15502425; wzws_cid=6e8cdc0aea81349b05c8a0b6c05cd7204b6e0f10e5a48d462175473d23abcb4891edf1ceb73464398cb1ce7e6f53999f7545dd0014a15b1fb4eec5c6cf37421f0c2b08528de36f728ec4c676ed264c7d") .get(); //獲取他所有的省 Elements elements = document.select("body > table:nth-child(3) > tbody > tr:nth-child(1) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody"); //解析省的超鏈接 Elements elements1 = elements.select("tbody > tr > td > a"); for(int j=0;j<elements1.size();j++){ // Thread.sleep(100); String s = elements1.get(j).select(“a”).attr(“href”); String provinceCode = s.replaceAll(“.html”,“”)+“0000”; System.out.println(“省代碼:” + provinceCode); String provinceName = elements1.get(j).text(); System.out.println(“省名稱:” + provinceName); Map<String,String> map = new HashMap<>(); map.put(provinceCode,provinceName); listMap.add(map); } for (int i1 = 0; i1 <31; i1++) { System.out.println("**********************i********************:"+i); if(i%1000==0){ Thread.sleep(1000*60*10); } Map<String, String> stringStringMap = listMap.get(i1); Iterator<Map.Entry<String, String>> iterator = stringStringMap.entrySet().iterator(); while(iterator.hasNext()){ Map.Entry<String,String> entry = iterator.next(); String provinceCode = entry.getKey(); String provinceName = entry.getValue(); String index = provinceCode.substring(0,2)+".html"; SXSSFWorkbook wb = new SXSSFWorkbook(100); SXSSFSheet sheet = (SXSSFSheet) wb.createSheet(); // TODO 這里改成自己的地址即可,也可以存放到一個文件里 String enterFileName = "C:\\Users\\pcc\\Desktop\\xingzhengquhua\\"+provinceName+".xlsx"; FileOutputStream fileOut = new FileOutputStream(enterFileName); Row row = sheet.createRow(0); sheet.createRow(i).createCell(0).setCellValue(provinceCode);// id sheet.getRow(i).createCell(1).setCellValue(provinceName);// name sheet.getRow(i).createCell(2).setCellValue(""); // pid sheet.getRow(i).createCell(3).setCellValue("1"); // type i++; try { jsoupList2(url1 + "/" + index, provinceName, provinceCode, sheet); } catch (SocketTimeoutException e) { e.printStackTrace(); jsoupList2(url1 + "/" + index, provinceName, provinceCode, sheet); } catch (ConnectException e) { e.printStackTrace(); jsoupList2(url1 + "/" + index, provinceName, provinceCode, sheet); } row.createCell(0).setCellValue("id"); row.createCell(1).setCellValue("district_name"); row.createCell(2).setCellValue("pid"); row.createCell(3).setCellValue("type"); wb.write(fileOut); fileOut.close(); } } }catch (Exception e){ e.printStackTrace(); }finally { } } //市級頁面 public static void jsoupList2(String url,String provinceName,String provinceCode,SXSSFSheet sheet) throws Exception { String cityName = ""; String cityCode = ""; url2 = url.replace(".html",""); Document document = Jsoup.connect(url).get(); Elements elements = document.select("body > table:nth-child(3) > tbody > tr:nth-child(1) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody"); Elements elements1 = elements.select("tbody > tr > td"); //j從2開始是因為他有個表頭 統(tǒng)計用區(qū)劃代碼 名稱 for (int j = 2; j < elements1.size(); j++) { System.out.println("**********************i********************:"+i); if(i%1000==0){ Thread.sleep(1000*60*10); } // Thread.sleep(500); //判斷是否是超鏈接,不是超鏈接也要獲取數(shù)據(jù) if(elements1.get(j).select(“td > a”).toString().equals(“”)){ String text = elements1.get(j).text(); if (j % 2 == 0) { System.out.println(“市代碼:” + text); sheet.createRow(i).createCell(0).setCellValue(text); } else { System.out.println(“市名稱:” + text); sheet.getRow(i).createCell(1).setCellValue(text); sheet.getRow(i).createCell(2).setCellValue(provinceCode); sheet.getRow(i).createCell(3).setCellValue(“3”); i++; } }else { Elements elements2 = elements1.get(j).select(“td > a”); for (int j1 = 0; j1 < elements2.size(); j1++) { String text = elements2.get(j1).text(); if (j % 2 == 0) { System.out.println(“市代碼:” + text); cityCode = text; sheet.createRow(i).createCell(0).setCellValue(text); } else { System.out.println(“市名稱:” + text); cityName = text; sheet.getRow(i).createCell(1).setCellValue(text); sheet.getRow(i).createCell(2).setCellValue(provinceCode); sheet.getRow(i).createCell(3).setCellValue(“2”); i++; String s = elements2.get(j1).select(“a”).attr(“href”); //TODO 這里排除了海南的幾個市區(qū),更改為不排除任何市區(qū) if(true) { try { jsoupList3(url1 + “/” + s,cityName,cityCode,provinceName,provinceCode, sheet); } catch (SocketTimeoutException e) { e.printStackTrace(); jsoupList3(url1 + “/” + s,cityName,cityCode,provinceName,provinceCode, sheet); } catch (ConnectException e) { e.printStackTrace(); jsoupList3(url1 + “/” + s,cityName,cityCode,provinceName,provinceCode, sheet); } } } } } } } //縣級頁面 public static void jsoupList3(String url,String cityName,String cityCode,String provinceName,String provinceCode,SXSSFSheet sheet) throws Exception { Document document = Jsoup.connect(url).get(); Elements elements = document.select(“body > table:nth-child(3) > tbody > tr:nth-child(1) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody”); Elements elements1 = elements.select(“tbody > tr > td”); String xianName = “”; String xianCode = “”; //j從2開始是因為他有個表頭 統(tǒng)計用區(qū)劃代碼 名稱 for (int j = 2; j < elements1.size(); j++) { System.out.println(“i:"+i); if(i%1000==0){ Thread.sleep(10006010); } // Thread.sleep(500); //判斷是否是超鏈接,不是超鏈接也要獲取數(shù)據(jù) if(elements1.get(j).select(“td > a”).toString().equals(”“)){ String text = elements1.get(j).text(); if (j % 2 == 0) { System.out.println(“縣代碼:” + text); sheet.createRow(i).createCell(0).setCellValue(text); } else { System.out.println(“縣名稱:” + text); sheet.getRow(i).createCell(1).setCellValue(text); sheet.getRow(i).createCell(2).setCellValue(cityCode); sheet.getRow(i).createCell(3).setCellValue(“3”); i++; } }else { Elements elements2 = elements1.get(j).select(“td > a”); for (int j1 = 0; j1 < elements2.size(); j1++) { String text = elements2.get(j1).text(); xianName = text; if (j % 2 == 0) { xianCode = text; System.out.println(“縣代碼:” + xianCode); sheet.createRow(i).createCell(0).setCellValue(text); } else { System.out.println(“縣名稱:” + text); sheet.getRow(i).createCell(1).setCellValue(text); sheet.getRow(i).createCell(2).setCellValue(cityCode); sheet.getRow(i).createCell(3).setCellValue(“3”); i++; String s = elements2.get(j1).select(“a”).attr(“href”); try { jsoupList4(url1 + “/” +provinceCode.substring(0,2)+”/“+ s,xianName,xianCode,cityName,cityCode,provinceName,provinceCode, sheet); } catch (SocketTimeoutException e) { e.printStackTrace(); jsoupList4(url1 + “/” +provinceCode.substring(0,2)+”/“+ s,xianName,xianCode,cityName,cityCode,provinceName,provinceCode, sheet); } catch (ConnectException e) { e.printStackTrace(); jsoupList4(url1 + “/” +provinceCode.substring(0,2)+”/"+ s,xianName,xianCode,cityName,cityCode,provinceName,provinceCode, sheet); } } } } } } //街道頁面 public static void jsoupList4(String url,String xianName,String xianCode,String cityName,String cityCode,String provinceName,String provinceCode,SXSSFSheet sheet) throws Exception { Document document = Jsoup.connect(url).get(); Elements elements = document.select("body > table:nth-child(3) > tbody > tr:nth-child(1) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody"); Elements elements1 = elements.select("tbody > tr > td"); //j從2開始是因為他有個表頭 統(tǒng)計用區(qū)劃代碼 名稱 for (int j = 2; j < elements1.size(); j++) { System.out.println("**********************i********************:"+i); if(i%1000==0){ Thread.sleep(1000*60*10); } // Thread.sleep(500); //判斷是否是超鏈接,不是超鏈接也要獲取數(shù)據(jù) if(elements1.get(j).select(“td > a”).toString().equals(“”)){ String text = elements1.get(j).text(); if (j % 2 == 0) { System.out.println(“街道代碼:” + text); sheet.createRow(i).createCell(0).setCellValue(text); } else { System.out.println(“街道名稱:” + text); sheet.getRow(i).createCell(1).setCellValue(text); sheet.getRow(i).createCell(2).setCellValue(xianCode); sheet.getRow(i).createCell(3).setCellValue(“4”); i++; } }else { Elements elements2 = elements1.get(j).select(“td > a”); for (int j1 = 0; j1 < elements2.size(); j1++) { String text = elements2.get(j1).text(); if (j % 2 == 0) { System.out.println(“街道代碼:” + text);// TODO 這里不能截取,不然街道界別數(shù)據(jù)截不全 sheet.createRow(i).createCell(0).setCellValue(text); } else { System.out.println(“街道名稱:” + text); sheet.getRow(i).createCell(1).setCellValue(text); sheet.getRow(i).createCell(2).setCellValue(xianCode); sheet.getRow(i).createCell(3).setCellValue(“4”); i++; } } } } } }
以上就是“Java如何實現(xiàn)獲取行政區(qū)劃”這篇文章的所有內容,感謝各位的閱讀!相信大家閱讀完這篇文章都有很大的收獲,小編每天都會為大家更新不同的知識,如果還想學習更多的知識,請關注億速云行業(yè)資訊頻道。
免責聲明:本站發(fā)布的內容(圖片、視頻和文字)以原創(chuàng)、轉載和分享為主,文章觀點不代表本網(wǎng)站立場,如果涉及侵權請聯(lián)系站長郵箱:is@yisu.com進行舉報,并提供相關證據(jù),一經(jīng)查實,將立刻刪除涉嫌侵權內容。