Jsoup GET POST爬取數據

1 依賴

<dependency>
  <!-- jsoup HTML parser library @ https://jsoup.org/ -->
  <groupId>org.jsoup</groupId>
  <artifactId>jsoup</artifactId>
  <version>1.13.1</version>
</dependency>

2 Get 請求

2.1 HTML

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

...

public JSONObject doGet(String paramUrl) {
    try {
        Document doc = Jsoup.connect(paramUrl)
        					.ignoreContentType(true)
                			.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1295.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.5 WindowsWechat")
               			    .timeout(10000)
                			.get();
        String returnStr = doc.getElementsByClass("標籤名").text();
        JSONObject reqResJson = changeJson(returnStr);  // 轉換成 json 數據
        return reqResJson;
    }
    catch (Exception e) {
		e.printStackTrace();
    }
    return null;
}

2.2 Json

import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;

...

public JSONObject doGet(String url) {
	try {
		Response response = Jsoup.connect(url)
								 .ignoreContentType(true)
		 						 .header("Content-type", "application/x-www-form-urlencoded; charset=UTF-8")
								 .header("x-forwarded-for", IpUtils.getRandomIp())  // IpUtils 是自己封裝的工具類
								 .method(Method.GET)
							   	 .timeout(10000)
								 .execute();
		if (response.statusCode() == 200) {
			String bodyStr = response.body();
	        return JSONObject.parseObject(bodyStr);
	    }
	}
	catch (Exception e) {
		e.printStackTrace();
	}
	return null;
}

3 Post 請求

3.1 application/x-www-form-urlencoded

import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;

...

public JSONObject doPost(Map<String, String> param, String url) {
        try {
            Response response = Jsoup.connect(url)
            						 .ignoreContentType(true)
                    				 .header("Content-type", "application/x-www-form-urlencoded")
                    				 .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
                    				 .header("x-forwarded-for", IpUtils.getRandomIp())  // IpUtils 是自己封裝的工具類
                   					 .method(Method.POST)
                    				 .data(param)
                    				 .timeout(25000)
                    				 .execute();
            if (response.statusCode() == 200) {
                String body = response.body();
                return JSONObject.parseObject(body);
            }
        }
        catch (Exception e) {
			e.printStackTrace();
        }
        return null;
  }

3.2 text/plain

import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;

...

/**
 * 
 * @param param 數據格式:key1=value1&key2=value2
 * @param url 
 */
public JSONObject doPost(String param, String url) {
        try {
            Response response = Jsoup.connect(url)
            		.ignoreContentType(true)
                    .header("Content-type", "text/plain")
                    .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
                    .header("x-forwarded-for", IpUtils.getRandomIp())  // IpUtils 是自己封裝的工具類
                    .method(Method.POST)
                    .requestBody(param)
                    .timeout(15000)
                    .execute();
            if (response.statusCode() == 200) {
                String body = response.body();
                return JSONObject.parseObject(body);
            }
        }
        catch (Exception e) {
			e.printStackTrace();
        }
        return null;
}

3.3 application/json

import com.alibaba.fastjson.JSONObject;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;

...

public JSONObject doPost(String paramJsonStr, Proxy proxy, url) {
        try {
            Response response = Jsoup.connect(url)
                    				 .ignoreContentType(true)
                    				 .header("Content-Type", "application/json;charset=UTF-8")
                    				 .header("User-Agent", "Mozilla/5.0 (Linux; Android 5.1.1; sm-j200g Build/LMY48Z) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/39.0.0.0 Mobile Safari/537.36")
                   					 .requestBody(paramJsonStr)
                  					 .proxy(proxy)  // 代理 IP
                    				 .method(Method.POST)
                    				 .timeout(10000)
                    				 .execute();
            if (response.statusCode() == 200) {
                String bodyStr = response.body();
                return JSONObject.parseObject(bodyStr);
            }
        }
        catch (Exception e) {
			e.printStackTrace();
        }
        return null;
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章