【Java爬蟲】Jsoup基本使用

直接上代碼

public class JsoupDemo {

    public static void main(String[] args) throws IOException {
        CloseableHttpClient client = HttpClients.createDefault();
        String url = "http://www.cnblogs.com";
        HttpGet httpGet = new HttpGet(url);
        httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36");

        CloseableHttpResponse response =  client.execute(httpGet);
        //獲取實體
        HttpEntity entity = response.getEntity();
        String content = EntityUtils.toString(entity, "utf-8");
        System.out.println("status:" + response.getStatusLine().getStatusCode());
        //System.out.println(content);

        Document dom = Jsoup.parse(content);
        /*Elements  title = dom.getElementsByTag("title");
        for(Element t : title){
            System.out.println(t.text());
        }*/

        //通過選擇器尋找所有的標題
        Elements elem = dom.select("#post_list .post_item .post_item_body h3 a");
        for(Element e : elem){
            System.out.println(e.html());
            System.out.println(e.attr("href")); //獲得href屬性的值
        }

        dom.select("img[src$=.png]");       //尋找所有結尾是png的圖片
        response.close();
        client.close();
    }
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章