Java使用URL獲取網頁內容

使用URLConnection來獲取網頁的內容,發送get方法,如果所提供的是CSS代碼,需求所需要的結果需要用到正則表達式來獲取。

package Get;

import Post.PostMethod;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

/**
 * 這是一個get方法,當客戶端需要通過get 方法獲取鏈接的時候就要採用這個方法
 * Created by syb on 2016/10/29.
 */
public class GetMethod {
    /**
     * 測試get 方法是否可用採用http://news.sina.com.cn/
     *
     * @param args
     */
    public static void main(String[] args) {
       /* String urlStr = "http://www.kuwo.cn/static/js/common/player/player.js?v20161009.js";
        System.out.println(sendGet(urlStr));*/
        /**
         * 採用正則表達式獲取所響應的鏈接
         */
        //region 測試get方法
       /* String res = sendGet(urlStr);
        String regex = "http:.{4}(\\w*.){4}(.\\w*){10}.shtml";
        Pattern pattern = Pattern.compile(regex);
        Matcher m = pattern.matcher(res);
        while (m.find()){
            System.out.println(m.group());
        }*/

      //  System.out.println(res);
        //endregion
    }

    /**
     * 這是一個get方法,如果網頁需要用到get來訪問源碼,就應該用此方法
     *
     * @param url 需要提供一個網頁的鏈接
     * @return 返回服務器所響應的內容
     */
    public static String sendGet(String url) {
        String result = "";
        BufferedReader in = null;
        String strUrlName = url;
        try {
            URL readUrl = new URL(strUrlName);
            URLConnection urlConection = readUrl.openConnection();
            //設置屬性
         //   urlConection.setRequestProperty("Accept", "***/*//*//**//*");
           /* urlConection.setRequestProperty("Connection", "keep-alive");
            urlConection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0");*/
            PostMethod p = new PostMethod();
            urlConection = p.urlConnectionSetParams(readUrl);
            urlConection.connect();
            in = new BufferedReader(new InputStreamReader(urlConection.getInputStream(), "utf-8"));
            //讀取響應
            //處理響應
            String line = null;
            while ((line = in.readLine()) != null) {
                result += line;
            }
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                if (in != null) {
                    in.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return result;
    }
}

發送post方法來獲取內容

/**
 * 這是一個post的方法,當獲取網頁的時候如果是post方法就需要調用此方法
 *
 * @param url    想要訪問的鏈接
 * @param params
 * @return 所獲取的響應的內容
 */
public static String sendPost(String url, String ...params) {
    String result = null;
    PrintWriter out = null; //用來讀取響應
    BufferedReader in = null; //將響應存儲
    try {
        URL realUrl = new URL(url);
        URLConnection urlConnection = null;
        urlConnection = urlConnectionSetParams(realUrl);
        urlConnection.setDoInput(true);
        urlConnection.setDoOutput(true);//設置屬性
        urlConnection.connect();
        //獲取輸出流
        out = new PrintWriter(urlConnection.getOutputStream());
        //發送請求
        out.print(params);
        out.flush();
        //已經發送請求,接下來開始獲取響應
        in = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(), "utf-8"));
        //讀取響應裏面的內容
        String line = null;
        while ((line = in.readLine()) != null) {
            result += line;
        }
    } catch (MalformedURLException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            if (in != null) {
                in.close();
            }
            if (out != null) {
                out.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return result;
}

/**
 * 這個方法是設置瀏覽器的屬性
 *
 * @param rl   要訪問的鏈接
 * 返回一個URLConnection的對象
 * @return
 */
public static URLConnection urlConnectionSetParams(URL rl) {
    try {
        URLConnection urlConnection = rl.openConnection();
        //設置通用請求
        urlConnection.setRequestProperty("accept", "*/*");
        urlConnection.setRequestProperty("connection", "keep-alive");
        urlConnection.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0");
        urlConnection.setRequestProperty("Upgrade-Insecure-Requests","1");
       // urlConnection.setRequestProperty("Accept-Encoding","gzip");
        //urlConnection.setRequestProperty("Host","gsxt.xjaic.gov.cn:7001");

        return urlConnection;
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}

}

“`
以上僅爲一種獲取鏈接的方法

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章