Hive具有一個可選的組件叫做HiveServer或者HiveThrift,其允許通過指定端口訪問Hive。Thrift是一個高效的RPC服務框架,可以通過編程的方式遠程訪問Hive。
hive對外提供thrift和jdbc兩種接口方式。其中jdbc是基於thrift接口封裝,jdbc支持每個方法都是同步操作,但是hiveserver2 thrift接口支持異步操作,jdbc同步只是因爲在現實上加了同步鎖。
hiveserver1和hiveserver2的主要區別如下:
version | jdbc driver class | jdbc connection url | 是否支持跨語言 | 是否支持併發寫 | 是否支持表鎖 | 默認thrift端口 |
hiveserver1 | org.apache.hive.jdbc.HiveDriver | jdbc:hive://: | 是 | 否 | 否 | 10000 |
hiveserver2 | org.apache.hadoop.hive.jdbc.HiveDriver | jdbc:hive2://: | 是 | 是 | 是(基於zk的表鎖) | 10000 |
下面就簡單的實現一個Java通過HiveServer2訪問Hive數據的示例:
pom.xml添加依賴:
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.41</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.2</version>
</dependency>
定義接口,HiveDao.java:
import com.alibaba.fastjson.JSONObject;
public interface HiveDao {
/**
* 一般查詢
*
* @param connection
* @param sql
* @return
*/
JSONObject querySql(HadoopConnection connection, String sql);
}
HadoopConnection.java:
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class HadoopConnection {
private int id;
private String url;
private long keepTime;
/**
* 連接狀態 0:空閒;1:使用
*/
private int status;
private String username;
private String pawssord;
private Connection connection;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public long getKeepTime() {
return keepTime;
}
public void setKeepTime(long keepTime) {
this.keepTime = keepTime;
}
public int getStatus() {
return status;
}
public void setStatus(int status) {
this.status = status;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getPawssord() {
return pawssord;
}
public void setPawssord(String pawssord) {
this.pawssord = pawssord;
}
public Connection getConnection() {
return connection;
}
public void setConnection(Connection connection) {
this.connection = connection;
}
public HadoopConnection(int id, String url, int status, String user, String pwd) throws SQLException {
super();
this.id = id;
this.url = url;
this.status = status;
this.username = user;
this.pawssord = pwd;
this.connection = DriverManager.getConnection(url, username, pawssord);
}
@Override
public String toString() {
return "HadoopConnection [id=" + id + ", status=" + status + "]";
}
}
HiveDaoImpl.Java:
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.Lists;
import org.springframework.stereotype.Component;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.List;
@Component
public class HiveDaoImpl implements HiveDao {
public static String HIVE_URL = "jdbc:hive2://ip:port/database?mapreduce.job.queuename=xxx;mapred.input.dir.recursive=true;hive.mapred.supports.subdirectories=true;hive.support.quoted.identifiers=none;mapreduce.job.reduces=1";
private static String DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver";
public static String USERNAME = "usename";
public static String PWD = "password";
private static List<HadoopConnection> connections = Lists.newArrayList();
/**
* 初始化連接
*/
public static void init(Integer numpool) {
try {
Class.forName(DRIVER_NAME);//獲得字符串參數中指定的類並初始化該類,Class.forName常見的用法就是在加載數據庫驅動的時候。
createConnectionPool(numpool, HIVE_URL, USERNAME, PWD);
} catch (Exception e) {
System.out.println("hadoop connection pool 初始化失敗 : " + e);
}
}
/**
* 無返回值sql
* @param sql
* @throws SQLException
*/
public void excuteSql(HadoopConnection connection, String sql) throws SQLException {
if (connection == null || sql == null) {
return;
}
connection.getConnection().createStatement().execute(sql);
resetConnection(connection);
}
/**
* 一般查詢
* @param connection
* @param sql
* @return
*/
public JSONObject querySql(HadoopConnection connection, String sql) {
JSONObject result = new JSONObject();
if (connection == null || sql == null) {
return result;
}
result.put("request", sql);
try {
//執行sql語句並獲取結果
ResultSet resultSet = connection.getConnection().createStatement().executeQuery(sql);
//獲取返回值的元數據信息
ResultSetMetaData metaData = resultSet.getMetaData();
//獲取列數
int columnCount = metaData.getColumnCount();
JSONArray heads = new JSONArray();
JSONArray types = new JSONArray();
for (int index = 1; index <= columnCount; index++) {
heads.add(metaData.getColumnName(index));
types.add(metaData.getColumnTypeName(index));
}
JSONArray rowDatas = new JSONArray();
while (resultSet.next()) {
JSONArray rowData = new JSONArray();
for (int index = 1; index <= columnCount; index++) {
rowData.add(resultSet.getString(index));
}
rowDatas.add(rowData);
}
result.put("heads", heads);
result.put("types", types);
result.put("result", rowDatas);
} catch (Exception e) {
result.put("error", e);
}
resetConnection(connection);
return result;
}
/**
* 保持連接存活
*/
public static void keepAliveCollection() {
HadoopConnection keepTemp = null;
try {
for (HadoopConnection connection : connections) {
keepTemp = connection;
if (connection.getStatus() == 0) {
if (connection.getConnection().isClosed()) {
resetConnection(connection);
}
long nowTime = System.currentTimeMillis();
if (nowTime - connection.getKeepTime() > 10000) {
connection.getConnection().createStatement().execute("select 0");
connection.setKeepTime(nowTime);
}
}
}
} catch (Exception e) {
resetConnection(keepTemp);
}
}
/**
* 獲取可用連接
* @return
*/
public static synchronized HadoopConnection getConnection() {
HadoopConnection connection = null;
for (HadoopConnection hadoop : connections) {
if (hadoop.getStatus() == 0) {
hadoop.setStatus(1);
connection = hadoop;
break;
}
}
return connection;
}
public static synchronized void closeConnections() {
if (getConnectionSize() == 0) {
return;
}
try {
for (HadoopConnection hadoop : connections) {
if (hadoop.getConnection() != null) {
hadoop.getConnection().close();
}
}
} catch (Exception e) {
System.out.println("close hadoop connection fail, msg: " + e.getMessage());
} finally {
connections.clear();
}
}
public static synchronized void resetConnection(HadoopConnection connection) {
try {
connection.getConnection().clearWarnings();
if (connection.getConnection().isClosed()) {
System.out.println(connection + " , is reseted!");
connection.setConnection(DriverManager.getConnection(connection.getUrl()));
}
connection.setStatus(0);
} catch (Exception e) {
System.out.println("reset connection exception : " + e);
}
}
public static synchronized void createConnectionPool(int num, String url, String user, String pwd) throws SQLException {
if (!connections.isEmpty()) {
return;
}
for (int i = 0; i < num; i++) {
connections.add(new HadoopConnection(i, url, 0, user, pwd));
System.out.println("hadoop connection pool success : url->["+url+"] user->["+user+"]" );
}
}
public static int getConnectionSize() {
return connections.size();
}
}
測試:
BaseTest.java:
import org.junit.runner.RunWith;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = {"classpath:testContext.xml"})
public class BaseTest {
}
HiveTest.java:
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.didichuxing.risk.hive.HadoopConnection;
import com.didichuxing.risk.hive.HiveDao;
import com.didichuxing.risk.hive.HiveDaoImpl;
import org.junit.Test;
import javax.annotation.Resource;
public class HiveTest extends BaseTest{
@Resource
private HiveDao hiveDao;
@Test
public void testHiveClient() {
try {
HiveDaoImpl.init(6);
HadoopConnection connection = HiveDaoImpl.getConnection();
//取hive庫中數據
String sql = "select * from database.table";
if (connection != null) {
JSONObject jsonObject = hiveDao.querySql(connection, sql);
if (null == jsonObject) {
System.out.println("json object is null.");
}
JSONArray rows = (JSONArray) jsonObject.get("result");
JSONArray heads = (JSONArray) jsonObject.get("heads");
JSONArray types = (JSONArray) jsonObject.get("types");
JSONArray row = null, head = null, type = null;
//打印字段名
for (int i = 0; i < heads.size(); i++) {
System.out.println(heads.get(i));
}
//打印字段類型
for (int i = 0; i < types.size(); i++) {
System.out.println(types.get(i));
}
if (rows == null || rows.size() == 0) {
System.out.println("row is null.");
return ;
}
for (int i = 0; i < rows.size(); i++) {
row = (JSONArray) rows.get(i);
System.out.println(row);
}
} else {
System.out.println("get hive data : " + "connection is null");
}
} catch (Exception e) {
System.out.println("get hive data exception : " + e.getMessage());
} finally {
HiveDaoImpl.closeConnections();
}
}
}
以上。
Author:憶之獨秀
Email:[email protected]
註明出處:https://blog.csdn.net/lavorange/article/details/80412288