直接上代碼
public class JsoupDemo {
public static void main(String[] args) throws IOException {
CloseableHttpClient client = HttpClients.createDefault();
String url = "http://www.cnblogs.com";
HttpGet httpGet = new HttpGet(url);
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36");
CloseableHttpResponse response = client.execute(httpGet);
//獲取實體
HttpEntity entity = response.getEntity();
String content = EntityUtils.toString(entity, "utf-8");
System.out.println("status:" + response.getStatusLine().getStatusCode());
//System.out.println(content);
Document dom = Jsoup.parse(content);
/*Elements title = dom.getElementsByTag("title");
for(Element t : title){
System.out.println(t.text());
}*/
//通過選擇器尋找所有的標題
Elements elem = dom.select("#post_list .post_item .post_item_body h3 a");
for(Element e : elem){
System.out.println(e.html());
System.out.println(e.attr("href")); //獲得href屬性的值
}
dom.select("img[src$=.png]"); //尋找所有結尾是png的圖片
response.close();
client.close();
}
}