HttpClient简介
HttpClient是Apache Jakarta Common下的子项目,可以用来提供高效的、最新的、功能丰富的支持HTTP协议的客户端编程工具包,并且它支持 HTTP 协议最新的版本。它的主要功能有:
(1) 实现了所有 HTTP 的方法(GET,POST,PUT,HEAD 等)
(2) 支持自动转向
(3) 支持 HTTPS 协议
(4) 支持代理服务器等
Jsoup简介
jsoup是一款Java的HTML解析器,可直接解析某个URL地址、HTML文本内容。它提供了一套非常省力的API,可通过DOM,CSS以及类似于jQuery的操作方法来取出和操作数据。它的主要功能有:
(1) 从一个URL,文件或字符串中解析HTML;
(2) 使用DOM或CSS选择器来查找、取出数据;
(3) 可操作HTML元素、属性、文本;
使用步骤
代码
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.junit.Test;
import java.util.List;
/**
-
HttpClient & Jsoup libruary test class
- Created by xuyh at 2017/11/6 15:28.
*/
public classHttpClientJsoupTest{
@Test
public void test() {
//通过httpClient获取网页响应,将返回的响应解析为纯文本
HttpGet httpGet = new HttpGet("http://sports.sina.com.cn/");
httpGet.setConfig(RequestConfig.custom().setSocketTimeout(30000).setConnectTimeout(30000).build());
CloseableHttpClient httpClient = null;
CloseableHttpResponse response = null;
String responseStr = "";
try {
httpClient = HttpClientBuilder.create().build();
HttpClientContext context = HttpClientContext.create();
response = httpClient.execute(httpGet, context);
int state = response.getStatusLine().getStatusCode();
if (state != 200)
responseStr = "";
HttpEntity entity = response.getEntity();
if (entity != null)
responseStr = EntityUtils.toString(entity, "utf-8");
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (response != null)
response.close();
if (httpClient != null)
httpClient.close();
} catch (Exception ex) {
ex.printStackTrace();
}
}
if (responseStr == null)
return;
//将解析到的纯文本用Jsoup工具转换成Document文档并进行操作
Document document = Jsoup.parse(responseStr);
List<Element> elements = document.getElementsByAttributeValue("class", "phdnews_txt fr").first()
.getElementsByAttributeValue("class", "phdnews_hdline");
elements.forEach(element -> {
for (Element e : element.getElementsByTag("a")) {
System.out.println(e.attr("href"));
System.out.println(e.text());
}
});
}
详解
新建HttpGet对象,对象将从 http://sports.sina.com.cn/ 这个URL地址获取GET响应。并设置socket超时时间和连接超时时间分别为30000ms。
将HttpClient和Jsoup进行封装,形成一个工具类,内容如下:
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.CookieStore;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import javax.net.ssl.*;
import java.io.IOException;
import java.security.GeneralSecurityException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
-
Http工具,包含:
-
普通http请求工具(使用httpClient进行http,https请求的发送)
- Created by xuyh at 2017/7/17 19:08.
*/
public classHttpUtils{
/**
- 请求超时时间,默认20000ms
*/
private int timeout = 20000;
/**
- cookie表
*/
private Map<String, String> cookieMap = new HashMap<>();
/**
- 请求编码(处理返回结果),默认UTF-8
*/
private String charset = "UTF-8";
private static HttpUtils httpUtils;
privateHttpUtils(){
}
/**
-
获取实例
*@return
*/
publicstaticHttpUtilsgetInstance(){
if (httpUtils == null)
httpUtils = new HttpUtils();
return httpUtils;
}
/**
- 清空cookieMap
*/
publicvoidinvalidCookieMap(){
cookieMap.clear();
}
publicintgetTimeout(){
return timeout;
}
/**
-
设置请求超时时间
*@paramtimeout
*/
publicvoidsetTimeout(inttimeout){
this.timeout = timeout;
}
publicStringgetCharset(){
return charset;
}
/**
-
设置请求字符编码集
*@paramcharset
*/
publicvoidsetCharset(String charset){
this.charset = charset;
}
/**
-
将网页返回为解析后的文档格式
*@paramhtml
*@return
*@throwsException
*/
publicstaticDocumentparseHtmlToDoc(String html)throwsException{
return removeHtmlSpace(html);
}
privatestaticDocumentremoveHtmlSpace(String str){
Document doc = Jsoup.parse(str);
String result = doc.html().replace(" ", "");
return Jsoup.parse(result);
}
/**
-
执行get请求,返回doc
*@paramurl
*@return
*@throwsException
*/
publicDocumentexecuteGetAsDocument(String url)throwsException{
return parseHtmlToDoc(executeGet(url));
}
/**
-
执行get请求
*@paramurl
*@return
*@throwsException
*/
publicStringexecuteGet(String url)throwsException{
HttpGet httpGet = new HttpGet(url);
httpGet.setHeader("Cookie", convertCookieMapToString(cookieMap));
httpGet.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build());
CloseableHttpClient httpClient = null;
String str = "";
try {
httpClient = HttpClientBuilder.create().build();
HttpClientContext context = HttpClientContext.create();
CloseableHttpResponse response = httpClient.execute(httpGet, context);
getCookiesFromCookieStore(context.getCookieStore(), cookieMap);
int state = response.getStatusLine().getStatusCode();
if (state == 404) {
str = "";
}
try {
HttpEntity entity = response.getEntity();
if (entity != null) {
str = EntityUtils.toString(entity, charset);
}
} finally {
response.close();
}
} catch (IOException e) {
throw e;
} finally {
try {
if (httpClient != null)
httpClient.close();
} catch (IOException e) {
throw e;
}
}
return str;
}
/**
-
用https执行get请求,返回doc
*@paramurl
*@return
*@throwsException
*/
publicDocumentexecuteGetWithSSLAsDocument(String url)throwsException{
return parseHtmlToDoc(executeGetWithSSL(url));
}
/**
-
用https执行get请求
*@paramurl
*@return
*@throwsException
*/
publicStringexecuteGetWithSSL(String url)throwsException{
HttpGet httpGet = new HttpGet(url);
httpGet.setHeader("Cookie", convertCookieMapToString(cookieMap));
httpGet.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build());
CloseableHttpClient httpClient = null;
String str = "";
try {
httpClient = createSSLInsecureClient();
HttpClientContext context = HttpClientContext.create();
CloseableHttpResponse response = httpClient.execute(httpGet, context);
getCookiesFromCookieStore(context.getCookieStore(), cookieMap);
int state = response.getStatusLine().getStatusCode();
if (state == 404) {
str = "";
}
try {
HttpEntity entity = response.getEntity();
if (entity != null) {
str = EntityUtils.toString(entity, charset);
}
} finally {
response.close();
}
} catch (IOException e) {
throw e;
} catch (GeneralSecurityException ex) {
throw ex;
} finally {
try {
if (httpClient != null)
httpClient.close();
} catch (IOException e) {
throw e;
}
}
return str;
}
/**
-
执行post请求,返回doc
*@paramurl
*@paramparams
*@return
*@throwsException
*/
publicDocumentexecutePostAsDocument(String url, Map<String, String> params)throwsException{
return parseHtmlToDoc(executePost(url, params));
}
/**
-
执行post请求
*@paramurl
*@paramparams
*@return
*@throwsException
*/
publicStringexecutePost(String url, Map<String, String> params)throwsException{
String reStr = "";
HttpPost httpPost = new HttpPost(url);
httpPost.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build());
httpPost.setHeader("Cookie", convertCookieMapToString(cookieMap));
List<NameValuePair> paramsRe = new ArrayList<>();
for (String key : params.keySet()) {
paramsRe.add(new BasicNameValuePair(key, params.get(key)));
}
CloseableHttpClient httpclient = HttpClientBuilder.create().build();
CloseableHttpResponse response;
try {
httpPost.setEntity(new UrlEncodedFormEntity(paramsRe));
HttpClientContext context = HttpClientContext.create();
response = httpclient.execute(httpPost, context);
getCookiesFromCookieStore(context.getCookieStore(), cookieMap);
HttpEntity entity = response.getEntity();
reStr = EntityUtils.toString(entity, charset);
} catch (IOException e) {
throw e;
} finally {
httpPost.releaseConnection();
}
return reStr;
}
/**
-
用https执行post请求,返回doc
*@paramurl
*@paramparams
*@return
*@throwsException
*/
publicDocumentexecutePostWithSSLAsDocument(String url, Map<String, String> params)throwsException{
return parseHtmlToDoc(executePostWithSSL(url, params));
}
/**
-
用https执行post请求
*@paramurl
*@paramparams
*@return
*@throwsException
*/
publicStringexecutePostWithSSL(String url, Map<String, String> params)throwsException{
String re = "";
HttpPost post = new HttpPost(url);
List<NameValuePair> paramsRe = new ArrayList<>();
for (String key : params.keySet()) {
paramsRe.add(new BasicNameValuePair(key, params.get(key)));
}
post.setHeader("Cookie", convertCookieMapToString(cookieMap));
post.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build());
CloseableHttpResponse response;
try {
CloseableHttpClient httpClientRe = createSSLInsecureClient();
HttpClientContext contextRe = HttpClientContext.create();
post.setEntity(new UrlEncodedFormEntity(paramsRe));
response = httpClientRe.execute(post, contextRe);
HttpEntity entity = response.getEntity();
if (entity != null) {
re = EntityUtils.toString(entity, charset);
}
getCookiesFromCookieStore(contextRe.getCookieStore(), cookieMap);
} catch (Exception e) {
throw e;
}
return re;
}
/**
-
发送JSON格式body的POST请求
*@paramurl 地址
*@paramjsonBody json body
*@return
*@throwsException
*/
publicStringexecutePostWithJson(String url, String jsonBody)throwsException{
String reStr = "";
HttpPost httpPost = new HttpPost(url);
httpPost.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build());
httpPost.setHeader("Cookie", convertCookieMapToString(cookieMap));
CloseableHttpClient httpclient = HttpClientBuilder.create().build();
CloseableHttpResponse response;
try {
httpPost.setEntity(new StringEntity(jsonBody, ContentType.APPLICATION_JSON));
HttpClientContext context = HttpClientContext.create();
response = httpclient.execute(httpPost, context);
getCookiesFromCookieStore(context.getCookieStore(), cookieMap);
HttpEntity entity = response.getEntity();
reStr = EntityUtils.toString(entity, charset);
} catch (IOException e) {
throw e;
} finally {
httpPost.releaseConnection();
}
return reStr;
}
/**
-
发送JSON格式body的SSL POST请求
*@paramurl 地址
*@paramjsonBody json body
*@return
*@throwsException
*/
publicStringexecutePostWithJsonAndSSL(String url, String jsonBody)throwsException{
String re = "";
HttpPost post = new HttpPost(url);
post.setHeader("Cookie", convertCookieMapToString(cookieMap));
post.setConfig(RequestConfig.custom().setSocketTimeout(timeout).setConnectTimeout(timeout).build());
CloseableHttpResponse response;
try {
CloseableHttpClient httpClientRe = createSSLInsecureClient();
HttpClientContext contextRe = HttpClientContext.create();
post.setEntity(new StringEntity(jsonBody, ContentType.APPLICATION_JSON));
response = httpClientRe.execute(post, contextRe);
HttpEntity entity = response.getEntity();
if (entity != null) {
re = EntityUtils.toString(entity, charset);
}
getCookiesFromCookieStore(contextRe.getCookieStore(), cookieMap);
} catch (Exception e) {
throw e;
}
return re;
}
privatevoidgetCookiesFromCookieStore(CookieStore cookieStore, Map<String, String> cookieMap){
List<Cookie> cookies = cookieStore.getCookies();
for (Cookie cookie : cookies) {
cookieMap.put(cookie.getName(), cookie.getValue());
}
}
privateStringconvertCookieMapToString(Map<String, String> map){
String cookie = "";
for (String key : map.keySet()) {
cookie += (key + "=" + map.get(key) + "; ");
}
if (map.size() > 0) {
cookie = cookie.substring(0, cookie.length() - 2);
}
return cookie;
}
/**
-
创建 SSL连接
*@return
*@throwsGeneralSecurityException
*/
privatestaticCloseableHttpClientcreateSSLInsecureClient()throwsGeneralSecurityException{
try {
SSLContext sslContext = new SSLContextBuilder().loadTrustMaterial(null, (chain, authType) -> true).build();
SSLConnectionSocketFactory sslConnectionSocketFactory = new SSLConnectionSocketFactory(sslContext,
(s, sslContextL) -> true);
return HttpClients.custom().setSSLSocketFactory(sslConnectionSocketFactory).build();
} catch (GeneralSecurityException e) {
throw e;
}
}
}
给大家推荐一个程序员学习交流群:863621962。群里有分享的视频,还有思维导图
群公告有视频,都是干货的,你可以下载来看。主要分享分布式架构、高可扩展、高性能、高并发、性能优化、Spring boot、Redis、ActiveMQ、Nginx、Mycat、Netty、Jvm大型分布式项目实战学习架构师视频。