jsoup爬取某网站安全数据
package com.vfsd.net; import java.io.IOException;
import java.sql.SQLException;
import java.util.Map; import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import com.vfsd.dao.ManageMySQL; /**
* Servlet implementation class GetURL13
*/
@WebServlet("/GetURL13")
public class GetURL13 extends HttpServlet {
private static final long serialVersionUID = 1L; /**
* @see HttpServlet#HttpServlet()
*/
public GetURL13() {
super();
// TODO Auto-generated constructor stub
}
private String message; @Override
public void init() throws ServletException {
message = "Hello world, this message is from servlet!";
System.out.println("------"+message);
try {
ManageMySQL.getConnection(); } catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
*/
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
// TODO Auto-generated method stub
//response.getWriter().append("Served at: ").append(request.getContextPath());
String agent1 = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"; int pageNum=1;
int pageSize=10;
//for(pageNum=1;pageNum<101;pageNum++)
for(pageNum=1;pageNum<924;pageNum++)
{
try {
int page1= (pageNum-1)*pageSize;
Map<Integer,String> map1 = ManageMySQL.getNewsLinkInTable(page1,pageSize,"data_bjszfhcxjswyh");
for(Integer key : map1.keySet())
{
System.out.println(key+" "+map1.get(key));
String news_link = map1.get(key);
String context1="";
String source1="";
String publishDate = "";
//String context1 = getContentByURL(news_link).replace(" ", ""); if(!news_link.contains("void"))
{
if(news_link.endsWith("html"))
{
Document documentRoot = Jsoup.connect(news_link).userAgent(agent1).get();
Elements elements2 = documentRoot.select("#content_list");
//Elements elements2_1 = documentRoot.select("div.div_right");
if(elements2.size()==1)
{
Element div_ele = elements2.get(0);
context1 = div_ele.text();
ManageMySQL.updateContextAndPublishDate2(key, context1.replace("'", "").replace("\"", ""),source1,publishDate,"data_bjszfhcxjswyh");
} } } }
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} }
} /**
* @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
*/
protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
// TODO Auto-generated method stub
doGet(request, response);
} }