Apache Hive+Kerberos安裝配置及 Kettle訪問帶 Kerberos 認證的 Hive的集成


1 連接


下面的安裝也主要以這個圖爲爲總領分類部署
Kerberos 請求過程圖


2 KDC 安裝

系統依賴部分

yum install -y libss
# 在安裝  krb5-server 報 libverto-module-base is needed by krb5-server-1.15.1-46.el7.x86_64 時安裝
yum install -y libverto-libevent
yum install -y logrotate

2.1 安裝 Kerberos 服務

# 1 查看環境中是否已經安裝存在 Kerberos
rpm -qa | grep -E "krb5|libkadm5"
# 刪除時需要格外注意,爲了版本統一一致,最好進行升級安裝,
#   Kerberos 和 SSH 有些庫是共用的,刪除後可能會導致 SSH 無法使用,確實某些庫文件
#   如果是 Docker 容器可以通過下面的命令將宿主機的文件拷貝到容器中,根據提示缺少的文件查找
#   docker cp /usr/lib64/libss.so.2.0 dev_mysql_v1:/usr/lib64/libss.so.2.0
# rpm -e --nodeps xxx

# 2 安裝 Kerberos 依賴的包
## 查看環境是否有 words,若沒有則安裝
## 因爲有些應用或數據庫會使用這個來檢查單詞的拼寫,或者密碼檢查器會使用這個來查找有誤的密碼。
rpm -qa | grep words
## 安裝 words
wget http://mirror.centos.org/centos/7/os/x86_64/Packages/words-3.0-22.el7.noarch.rpm
## 安裝成功後會在 /usr/share/dict/words 有一個詞文件。
rpm -ivh words-3.0-22.el7.noarch.rpm



# 2 下載所需包
# CentOS 7.8.2003 默認版本爲 krb5-libs-1.15.1-46
wget http://mirror.centos.org/centos/7/os/x86_64/Packages/krb5-libs-1.15.1-46.el7.x86_64.rpm
wget http://mirror.centos.org/centos/7/os/x86_64/Packages/krb5-server-1.15.1-46.el7.x86_64.rpm
wget http://mirror.centos.org/centos/7/os/x86_64/Packages/krb5-workstation-1.15.1-46.el7.x86_64.rpm
wget http://mirror.centos.org/centos/7/os/x86_64/Packages/libkadm5-1.15.1-46.el7.x86_64.rpm

# 3 安裝
rpm -iUh krb5-libs-1.15.1-46.el7.x86_64.rpm
rpm -ivh libkadm5-1.15.1-46.el7.x86_64.rpm
rpm -ivh krb5-workstation-1.15.1-46.el7.x86_64.rpm
rpm -ivh krb5-server-1.15.1-46.el7.x86_64.rpm

2.2 配置 /var/kerberos/krb5kdc/kdc.conf

[kdcdefaults]
 kdc_ports = 88
 kdc_tcp_ports = 88

[realms]
  YORE.COM = {
   # JDK 8 (至少在 jdk 1.8 _152 之前的)可能不支持,如果使用中發現異常:java.security.InvalidKeyException: Illegal key size,
   # 方法1,可以將 aes256-cts 去點,保留 aes128-cts
   # 方法2,或者下載官方提供的 jce_policy-8.zip 包,解壓後將 local_policy.jar 和 US_export_policy.jar 覆蓋JDK安裝目錄下的 jre\lib\security 下的兩個文件
   #       每個版本的路徑可能稍微有差別,只要找到 unlimited 下的
   #       下載地址 http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html
   #              https://www.oracle.com/java/technologies/javase-jce-all-downloads.html
   #master_key_type = aes256-cts
   acl_file = /var/kerberos/krb5kdc/kadm5.acl
   dict_file = /usr/share/dict/words
   admin_keytab = /var/kerberos/krb5kdc/kadm5.keytab
   max_renewable_life = 7d 0h 0m 0s
   #supported_enctypes = aes256-cts:normal aes128-cts:normal des3-hmac-sha1:normal arcfour-hmac:normal camellia256-cts:normal camellia128-cts:normal des-hmac-sha1:normal des-cbc-md5:normal des-cbc-crc:normal
   supported_enctypes = aes128-cts:normal des3-hmac-sha1:normal arcfour-hmac:normal camellia256-cts:normal camellia128-cts:normal des-hmac-sha1:normal des-cbc-md5:normal des-cbc-crc:normal
 }

2.3 配置 /var/kerberos/krb5kdc/kadm5.acl

Realme 改爲上面配置的名字 YORE.COM。這樣名稱匹配 */[email protected] 的都會認爲 admin,權限是 *,代表全部權限。

*/[email protected] *

2.4 配置 /etc/krb5.conf

其中 kdc.yore.com 爲 KDC 服務的主機名,端口缺省時默認爲 88,admin_server 端口缺省時默認爲 749。

# Configuration snippets may be placed in this directory as well
includedir /etc/krb5.conf.d/

[logging]
 default = FILE:/var/log/krb5libs.log
 kdc = FILE:/var/log/krb5kdc.log
 admin_server = FILE:/var/log/kadmind.log

[libdefaults]
 default_realm = YORE.COM
 dns_lookup_realm = false
 dns_lookup_kdc = false
 ticket_lifetime = 24h
 renew_lifetime = 7d
 forwardable = true
 #rdns = false
 #pkinit_anchors = /etc/pki/tls/certs/ca-bundle.crt
# default_realm = EXAMPLE.COM
 #default_ccache_name = KEYRING:persistent:%{uid}
 udp_preference_limit = 1
 kdc_timeout = 3000
 
[realms]
 YORE.COM = {
  kdc = yore.bigdata03.com:88
  admin_server = yore.bigdata03.com:749
  default_domain = YORE.COM
 }

[domain_realm]
.yore.bigdata03.com = YORE.COM
yore.bigdata03.com = YORE.COM

2.5 創建 Kerberos 數據庫

# 1 創建/初始化 Kerberos database
# 當遇到問題,可能需要執行: /usr/sbin/kdb5_util -r CDH.COM -m destory -f。 
#            刪除  /var/kerberos/krb5kdc/principal*
# 
# 期間會要求輸入密碼。kdc123
/usr/sbin/kdb5_util create -s -r YORE.COM

# 2 查看生成的文件
# 前兩個是我們前兩步設置的,後面的 principal* 就是本次生成的
[root@kdc download]# ll /var/kerberos/krb5kdc/
total 24
-rw-r--r-- 1 root root   19 Mar 25 21:41 kadm5.acl
-rw-r--r-- 1 root root  488 Mar 25 21:42 kdc.conf
-rw------- 1 root root 8192 Mar 25 21:40 principal
-rw------- 1 root root 8192 Mar 25 21:40 principal.kadm5
-rw------- 1 root root    0 Mar 25 21:40 principal.kadm5.lock
-rw------- 1 root root    0 Mar 25 21:40 principal.ok

2.6 創建 Kerberos 管理員賬號

# 這裏會提示時輸入管理員的密碼(kdc123),再次確認,未報錯則創建成功。
[root@kdc download]# /usr/sbin/kadmin.local -q "addprinc admin/[email protected]"
Authenticating as principal root/[email protected] with password.
WARNING: no policy specified for admin/[email protected]; defaulting to no policy
Enter password for principal "admin/[email protected]":
Re-enter password for principal "admin/[email protected]":
Principal "admin/[email protected]" created.

2.7 將 Kerberos 添加到自啓動服務,並啓動krb5kdc和kadmin服務

# 自啓
systemctl enable krb5kdc
systemctl enable kadmin

# 啓動
systemctl start krb5kdc
systemctl start kadmin

# 狀態
systemctl status krb5kdc
systemctl status kadmin

2.8 查看認證信息

# 1 提示輸入密碼時,輸入 admin 的密碼(Kerberos 管理員賬號的密碼: kdc123)
[root@kdc download]# kinit admin/[email protected]
Password for admin/[email protected]:

# 2 查看所有的 Principal
/usr/sbin/kadmin.local -q "listprincs"

# klist
[root@kdc download]# klist
Ticket cache: FILE:/tmp/krb5cc_0
Default principal: admin/[email protected]
Valid starting       Expires              Service principal
2020-07-04T11:18:24  2020-07-05T11:18:24  krbtgt/[email protected]
        renew until 2020-07-11T11:18:24

3 Application Server

應用服務這裏可以指代開啓了 Kerberos 認證的大數據平臺或者開啓了 Kerberos 認證的大數據集羣環境,這裏主要介紹搭建一個帶有 Kerberos 認證的 Apache Hadoop 和 Apache Hive ,因此這裏的應用服務主要表示 Hadoop 和 Hive 所在的服務節點環境。

3.1 Kerberos 安裝

# 1 資源
scp -P 30021 -r krb5-libs-1.15.1-46.el7.x86_64.rpm krb5-workstation-1.15.1-46.el7.x86_64.rpm \
libkadm5-1.15.1-46.el7.x86_64.rpm root@cdh3:/opt/download/

# 安裝
rpm -iUh krb5-libs-1.15.1-46.el7.x86_64.rpm
rpm -ivh libkadm5-1.15.1-46.el7.x86_64.rpm
rpm -ivh krb5-workstation-1.15.1-46.el7.x86_64.rpm

# 將 KDC 的 /etc/krb5.conf 拷貝到應用服務節點 /etc/ 下
scp /etc/krb5.conf root@bigdata01:

3.2 爲應用服務節點創建 Principals 和 keytab

【說明】:這裏爲了簡化,Hadoop 所有服務和 Hive 的服務都統一使用一個principal 主體和 一個 keytab 文件,真實生產環境最好每個服務對應一個principal,安裝配置流程基本類似。

# 1 KDC 服務節點生成一個 hadoop 主體(kdc123)
/usr/sbin/kadmin.local -q "addprinc hadoop/[email protected]"

# 2 導入 keytab 文件
/usr/sbin/kadmin.local -q "xst -k ./hadoop.keytab hadoop/[email protected]"

# 3 將 hadoop.keytab 發送到 Hadoop 節點(這一步下調到下一步解壓 Hadoop 後執行)
scp -P 22 hadoop.keytab root@bigdata01:/opt/installed/hadoop-3.0.3/etc/hadoop/

# 4 查看導入的 keytab 文件(注意文件權限)
/usr/bin/klist -ket /opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab

# 5 創建一個 Hadoop 應用用戶,例如 hdfs 用戶。下面均以此用戶操作
useradd -s /bin/bash hdfs
# 設置密碼
passwd hdfs

# 5 在 Hadoop 各節點獲取緩存票據。那個用戶操作 hadoop,那個用戶執行認證
kinit -kt /opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab hadoop/[email protected]

# 6 查看票據
klist 

3.3 Apache Hadoop 安裝部署

3.3.1 解壓與配置

# 1 下載
wget http://archive.apache.org/dist/hadoop/common/hadoop-3.0.3/hadoop-3.0.3.tar.gz

# 2 解壓
tar -zxf hadoop-3.0.3.tar.gz -C /opt/installed
# 爲了節省空間,當不查看 Hadoop 文檔是,可以刪除掉
rm -rf /opt/installed/hadoop-3.0.3/share/doc

# 3 SSH
ssh-keygen -t rsa
ssh-copy-id -i ~/.ssh/id_rsa.pub hdfs@bigdata01

# 4 配置環境變量(可以配置到用戶環境變量下: vim ~/.bash_profile)
# set hadoop environment
export HADOOP_HOME=/opt/installed/hadoop-3.0.3
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

# 5 創建所需文件夾
# root 用戶創建,在將 /u01/hadoop 屬組改爲 hdfs
mkdir -p /u01/hadoop/dfs/dn
mkdir -p /u01/hadoop/dfs/nn
mkdir -p /u01/hadoop/dfs/snn
#mkdir -p /u01/hadoop/yarn/container-logs
#mkdir -p /u01/hadoop/yarn/container-executor
#mkdir -p /u01/hadoop/app/tmp/nm-local-dir
chown -R hdfs:hdfs /u01/hadoop

# 6 配置 hadoop-env.sh
vim $HADOOP_HOME/etc/hadoop/hadoop-env.sh
### 添加如下配置 ####
export JAVA_HOME=/usr/local/jdk1.8.0_231
export HADOOP_HOME=/opt/installed/hadoop-3.0.3
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"
## 配置用戶信息,如果是 root 用戶啓動,需要配置爲 root;
#export HDFS_NAMENODE_USER=root
#export HDFS_DATANODE_USER=root
#export HDFS_SECONDARYNAMENODE_USER=root
#export YARN_RESOURCEMANAGER_USER=root export YARN_NODEMANAGER_USER=root

3.3.2 配置 core-site.xml

vim $HADOOP_HOME/etc/hadoop/core-site.xml
<configuration> 
  <property> 
    <name>fs.defaultFS</name>  
    <value>hdfs://bigdata01:8020</value> 
  </property>  
  <!-- 設置垃圾回收的時間,0爲禁止,單位分鐘數 -->  
  <property> 
    <name>fs.trash.interval</name>  
    <value>60</value> 
  </property>  
  <property> 
    <name>fs.trash.checkpoint.interval</name>  
    <value>0</value> 
  </property>  
  <property> 
    <name>hadoop.proxyuser.root.groups</name>  
    <value>*</value> 
  </property>  
  <property> 
    <name>hadoop.proxyuser.root.hosts</name>  
    <value>*</value> 
  </property>
  <property> 
    <name>hadoop.proxyuser.hdfs.groups</name>  
    <value>*</value> 
  </property>  
  <property> 
    <name>hadoop.proxyuser.hdfs.hosts</name>  
    <value>*</value> 
  </property>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/tmp/hadoop-${user.name}</value>
  </property>
  
  <!-- 開啓 Kerberos -->  
  <property> 
    <name>hadoop.security.authentication</name>  
    <value>kerberos</value> 
  </property>  
  <property> 
    <name>hadoop.security.authorization</name>  
    <value>true</value> 
  </property>  
  <property> 
    <name>hadoop.rpc.protection</name>  
    <value>authentication</value> 
  </property> 
  <!--<property> 
    <name>hadoop.security.auth_to_local</name>  
    <value>DEFAULT</value>
    <description>Maps kerberos principals to local user names</description>
  </property>-->
  <property> 
    <name>hadoop.security.auth_to_local</name>  
    <value>
      RULE:[2:$1/$2@$0](hadoop/.*@YORE.COM)s/.*/hdfs/ 
      RULE:[2:$1/$2@$0](hadoop/.*@YORE.COM)s/.*/yarn/ 
      RULE:[2:$1/$2@$0](hadoop/.*@YORE.COM)s/.*/mapred/ 
      DEFAULT
    </value> 
  </property>

</configuration>

3.3.3 配置 hdfs-site.xml

# 1 這裏需要注意的是 Centos 7 非 root 用戶無法使用 1024 一下的端口
## 方式一:修改爲 1024 以上的端口
## 方式二:端口重定向
yum install iptables-services
# 查看現有的iptables 規則
iptables -L -n
# 可以關閉防火強,或者可以開啓端口
# 將端口 x 重定向到 xx
iptables -t nat -A PREROUTING -p tcp --dport x -j REDIRECT --to-port xx
# 保存
iptables-save > /etc/sysconfig/iptables


vim $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<configuration> 
  <property> 
    <name>dfs.namenode.name.dir</name>  
    <value>file:///u01/hadoop/dfs/nn</value> 
  </property>  
  <property> 
    <name>dfs.datanode.data.dir</name>  
    <value>file:///u01/hadoop/dfs/dn</value> 
  </property>  
  <property> 
    <name>dfs.namenode.checkpoint.dir</name>  
    <value>file:///u01/hadoop/dfs/snn</value> 
  </property>  
  <!--block的副本數,默認爲3-->  
  <property> 
    <name>dfs.replication</name>  
    <value>1</value> 
  </property>  
  <property> 
    <name>dfs.permissions</name>  
    <value>false</value> 
  </property> 
  
  <!-- Kerberos -->  
  <!-- 1 SSL -->  
  <property> 
    <name>dfs.http.policy</name>  
    <value>HTTPS_ONLY</value> 
  </property>  
  <property> 
    <name>dfs.namenode.https-address</name>  
    <value>bigdata01:9871</value> 
  </property>  
  <!--<property> 
    <name>dfs.https.enable</name>  
    <value>true</value> 
  </property>-->
  <!-- 2 NameNode -->  
  <property> 
    <name>dfs.namenode.http-address</name>  
    <value>bigdata01:50070</value> 
  </property>  
  <property> 
    <name>dfs.block.access.token.enable</name>  
    <value>true</value> 
  </property>  
  <property> 
    <name>dfs.namenode.kerberos.principal</name>  
    <value>hadoop/[email protected]</value> 
  </property>  
  <property> 
    <name>dfs.namenode.keytab.file</name>  
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value> 
  </property>  
  <property> 
    <name>dfs.namenode.kerberos.internal.spnego.principal</name>  
    <value>hadoop/[email protected]</value> 
  </property>  
  <!-- 3 Secondary NameNode -->  
  <property> 
    <name>dfs.namenode.secondary.http-address</name>  
    <value>bigdata01:9868</value> 
  </property>  
  <property> 
    <name>dfs.secondary.https.address</name>  
    <value>bigdata01:9869</value> 
  </property>  
  <property> 
    <name>dfs.secondary.namenode.keytab.file</name>  
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value> 
  </property>  
  <property> 
    <name>dfs.secondary.namenode.kerberos.principal</name>  
    <value>hadoop/[email protected]</value> 
  </property>  
  <property> 
    <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>  
    <value>hadoop/[email protected]</value> 
  </property>  
  <!-- 4 JournalNode -->  
  <!--<property>
      <name>dfs.journalnode.kerberos.principal</name>
      <value>hadoop/[email protected]</value>
   </property>
  <property>
    <name>dfs.journalnode.keytab.file</name>
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value>
  </property>
  <property>
    <name>dfs.journalnode.kerberos.internal.spnego.principal</name>
    <value>hadoop/[email protected]</value>
  </property>
  <property>
    <name>dfs.journalnode.https-address</name>
    <value>bigdata01:8481</value>
  </property>-->  
  <!-- 5 DataNode -->  
  <property> 
    <name>dfs.datanode.data.dir.perm</name>  
    <value>700</value> 
  </property>  
  <property> 
    <name>dfs.datanode.address</name>  
    <!--<value>0.0.0.0:1004</value>-->
    <value>0.0.0.0:1104</value> 
  </property>  
  <property> 
    <name>dfs.datanode.http.address</name>  
    <!--<value>0.0.0.0:1006</value>-->
    <value>0.0.0.0:1106</value> 
  </property>
  <!-- 支持 SASL -->
  <property> 
    <name>dfs.data.transfer.protection</name>  
    <value>integrity</value> 
  </property>  
  <!--<property> 
    <name>dfs.datanode.https.address</name>  
    <value>0.0.0.0:9865</value> 
  </property>-->
  <property> 
    <name>dfs.datanode.kerberos.principal</name>  
    <value>hadoop/[email protected]</value> 
  </property>  
  <property> 
    <name>dfs.datanode.keytab.file</name>  
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value> 
  </property>  
  <property> 
    <name>dfs.encrypt.data.transfer</name>  
    <value>false</value> 
  </property>  
  <!-- 6 WebHDFS -->  
  <property> 
    <name>dfs.web.authentication.kerberos.principal</name>  
    <value>hadoop/[email protected]</value> 
  </property>  
  <property> 
    <name>dfs.web.authentication.kerberos.keytab</name>  
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value> 
  </property> 
</configuration>

3.3.4 配置 ssl

(1) 生成所需認證文件

mkdir /etc/https
cd /etc/https
# 生成ca。會提示輸入密碼,輸入一個大於 6 位的密碼(bigdata)
openssl req -new -x509 -keyout hdfs_ca_key -out hdfs_ca_cert -days 3650 -subj '/C=CN/ST=beijing/L=chaoyang/O=yore/OU=dt/CN=yore.com'

# 在每一條機器上生成 keystore,和trustores
## 生成 keystore
keytool -keystore keystore -alias localhost -validity 3650 -genkey -keyalg RSA -keysize 2048 -dname "CN=${fqdn}, OU=DT, O=DT, L=CY, ST=BJ, C=CN"
## 添加 CA 到 truststore。是否信任此證書時輸入 y
keytool -keystore truststore -alias CARoot -import -file hdfs_ca_cert
## 從 keystore 中導出 cert
keytool -certreq -alias localhost -keystore keystore -file cert
## 用 CA 對 cert 簽名
openssl x509 -req -CA hdfs_ca_cert -CAkey hdfs_ca_key -in cert -out cert_signed -days 3650 -CAcreateserial
## 將 CA 的 cert 和用 CA 簽名之後的 cert 導入 keystore
keytool -keystore keystore -alias CARoot -import -file hdfs_ca_cert
keytool -keystore keystore -alias localhost -import -file cert_signed
## 複製一份,帶上後綴
cp keystore /etc/https/keystore.jks
cp truststore /etc/https/truststore.jks

(2) 配置 ssl-client.xml

cp $HADOOP_HOME/etc/hadoop/ssl-client.xml.example $HADOOP_HOME/etc/hadoop/ssl-client.xml
vim $HADOOP_HOME/etc/hadoop/ssl-client.xml
<configuration> 
  <property> 
    <name>ssl.client.truststore.location</name>  
    <value>/etc/https/truststore.jks</value>  
    <description>Truststore to be used by clients like distcp. Must be specified.</description> 
  </property>  
  <property> 
    <name>ssl.client.truststore.password</name>  
    <value>bigdata</value>  
    <description>Optional. Default value is "".</description> 
  </property>  
  <property> 
    <name>ssl.client.truststore.type</name>  
    <value>jks</value>  
    <description>Optional. The keystore file format, default value is "jks".</description> 
  </property>  
  <property> 
    <name>ssl.client.truststore.reload.interval</name>  
    <value>10000</value>  
    <description>Truststore reload check interval, in milliseconds.Default value is 10000 (10 seconds).</description> 
  </property>  
  <property> 
    <name>ssl.client.keystore.location</name>  
    <value>/etc/https/keystore.jks</value>  
    <description>Keystore to be used by clients like distcp. Must be specified.</description> 
  </property>  
  <property> 
    <name>ssl.client.keystore.password</name>  
    <value>bigdata</value>  
    <description>Optional. Default value is "".</description> 
  </property>  
  <property> 
    <name>ssl.client.keystore.keypassword</name>  
    <value>bigdata</value>  
    <description>Optional. Default value is "".</description> 
  </property>  
  <property> 
    <name>ssl.client.keystore.type</name>  
    <value>jks</value>  
    <description>Optional. The keystore file format, default value is "jks".</description> 
  </property> 
</configuration>

(3) 配置 ssl-server.xml

cp $HADOOP_HOME/etc/hadoop/ssl-server.xml.example $HADOOP_HOME/etc/hadoop/ssl-server.xml
vim $HADOOP_HOME/etc/hadoop/ssl-server.xml
<configuration> 
  <property> 
    <name>ssl.server.truststore.location</name>  
    <value>/etc/https/truststore.jks</value>  
    <description>Truststore to be used by NN and DN. Must be specified.</description> 
  </property>  
  <property> 
    <name>ssl.server.truststore.password</name>  
    <value>bigdata</value>  
    <description>Optional. Default value is "".</description> 
  </property>  
  <property> 
    <name>ssl.server.truststore.type</name>  
    <value>jks</value>  
    <description>Optional. The keystore file format, default value is "jks".</description> 
  </property>  
  <property> 
    <name>ssl.server.truststore.reload.interval</name>  
    <value>10000</value>  
    <description>Truststore reload check interval, in milliseconds.Default value is 10000 (10 seconds).</description> 
  </property>  
  <property> 
    <name>ssl.server.keystore.location</name>  
    <value>/etc/https/keystore.jks</value>  
    <description>Keystore to be used by NN and DN. Must be specified.</description> 
  </property>  
  <property> 
    <name>ssl.server.keystore.password</name>  
    <value>bigdata</value>  
    <description>Optional. Default value is "".</description> 
  </property>  
  <property> 
    <name>ssl.server.keystore.keypassword</name>  
    <value>bigdata</value>  
    <description>Optional. Default value is "".</description> 
  </property>  
  <property> 
    <name>ssl.client.keystore.type</name>  
    <value>jks</value>  
    <description>Optional. The keystore file format, default value is "jks".</description> 
  </property> 
</configuration>

3.3.5 配置 mapred-site.xml

vim $HADOOP_HOME/etc/hadoop/mapred-site.xml
<configuration> 
  <property> 
    <name>mapreduce.framework.name</name>  
    <value>yarn</value>     
  </property>  
  <property> 
    <name>mapreduce.jobhistory.webapp.address</name>  
    <value>bigdata01:19888</value> 
  </property>  
  <property> 
    <name>mapreduce.jobhistory.webapp.https.address</name>  
    <value>bigdata01:19890</value> 
  </property>  
  <!-- Kerberos -->  
  <!-- MapReduce JobHistory Server -->  
  <property> 
    <name>mapreduce.jobhistory.address</name>  
    <value>bigdata01:10020</value> 
  </property>  
  <property> 
    <name>mapreduce.jobhistory.keytab</name>  
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value> 
  </property>  
  <property> 
    <name>mapreduce.jobhistory.principal</name>  
    <value>hadoop/[email protected]</value> 
  </property> 
</configuration>

3.3.6 配置 yarn-site.xml

vim $HADOOP_HOME/etc/hadoop/yarn-site.xml
<configuration> 
  <!-- Site specific YARN configuration properties -->
  
  <property>        
    <name>yarn.resourcemanager.hostname</name>         
    <value>bigdata01</value>     
  </property>  
  <property>        
    <name>yarn.nodemanager.aux-services</name>         
    <value>mapreduce_shuffle</value>     
  </property>  
  <property> 
    <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>  
    <value>org.apache.hadoop.mapred.ShuffleHandler</value> 
  </property>  
  <!--<property> 
    <name>yarn.nodemanager.local-dirs</name>  
    <value>file:///u01/hadoop/yarn</value> 
  </property> --> 
  <property> 
    <name>yarn.nodemanager.local-dirs</name>  
    <value>${hadoop.tmp.dir}/nm-local-dir</value> 
  </property>  
  <property> 
    <name>yarn.log-aggregation-enable</name>  
    <value>true</value> 
  </property>  
  <property> 
    <name>yarn.log.server.url</name>  
    <value>http://bigdata01:19888/jobhistory/logs/</value> 
  </property>  
  <property> 
    <name>yarn.nodemanager.vmem-check-enabled</name>  
    <value>false</value> 
  </property>  
  <property> 
    <name>yarn.application.classpath</name>  
    <value>
      $HADOOP_HOME/etc/hadoop, 
      $HADOOP_HOME/share/hadoop/common/*, 
      $HADOOP_HOME/share/hadoop/common/lib/*, 
      $HADOOP_HOME/share/hadoop/hdfs/*, 
      $HADOOP_HOME/share/hadoop/hdfs/lib/*, 
      $HADOOP_HOME/share/hadoop/mapreduce/*, 
      $HADOOP_HOME/share/hadoop/mapreduce/lib/*, 
      $HADOOP_HOME/share/hadoop/yarn/*, 
      $HADOOP_HOME/share/hadoop/yarn/lib/*
    </value> 
  </property> 
  
  <!-- Kerberos -->  
  <!-- 1 ResourceManager -->  
  <property> 
    <name>yarn.resourcemanager.principal</name>  
    <value>hadoop/[email protected]</value> 
  </property>  
  <property> 
    <name>yarn.resourcemanager.keytab</name>  
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value> 
  </property>  
  <property> 
    <name>yarn.resourcemanager.webapp.https.address</name>  
    <value>${yarn.resourcemanager.hostname}:8090</value> 
  </property>  
  <!-- 2 NodeManager -->  
  <property> 
    <name>yarn.nodemanager.principal</name>  
    <value>hadoop/[email protected]</value> 
  </property>  
  <property> 
    <name>yarn.nodemanager.keytab</name>  
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value> 
  </property>  
  <!--<property> 
    <name>yarn.nodemanager.container-executor.class</name>  
    <value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value> 
  </property>  
  <property> 
    <name>yarn.nodemanager.linux-container-executor.group</name>  
    <value>hadoop</value> 
  </property>  
  <property>
  		<name>yarn.nodemanager.linux-container-executor.path</name>
  		<value>/u01/hadoop/yarn/container-executor<</value>
		</property>-->  
  <property> 
    <name>yarn.nodemanager.webapp.https.address</name>  
    <value>0.0.0.0:8044</value> 
  </property>  
  <!-- 3 Configuration for WebAppProxy -->  
  <!--<property> 
    <name>yarn.web-proxy.address</name>  
    <value>0.0.0.0:8044</value>
    <description>if this is the same as yarn.resourcemanager.webapp.address or it is not defined then the ResourceManager will run the proxy otherwise a standalone proxy server will need to be launched. </description>
  </property>  
  <property> 
    <name>yarn.web-proxy.keytab</name>  
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value> 
  </property>  
  <property> 
    <name>yarn.web-proxy.principal</name>  
    <value>hadoop/[email protected]</value> 
  </property> -->
</configuration>

3.3.7 初始化 Hadoop 及啓停

# 啓動前修改下工作節點配置。將集羣的每個節點 hostname 寫入下面文件中,這裏僅寫入 bigdata01
vim $HADOOP_HOME/etc/hadoop/workers

# 1 初始化
$HADOOP_HOME/bin/hdfs namenode -format

# 2 啓動 Hadoop
# $HADOOP_HOME/sbin/start-all.sh 
$HADOOP_HOME/sbin/start-dfs.sh 
$HADOOP_HOME/sbin/start-yarn.sh

# 3 停止 Hadoop
$HADOOP_HOME/sbin/stop-all.sh 

3.3.8 測試及常用命令

# 1 測試集羣可用性。
hadoop fs -mkdir -p /tmp/input
hadoop fs -put $HADOOP_HOME/README.txt /tmp/input
export hadoop_version=`hadoop version | head -n 1 | awk '{print $2}'`
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-$hadoop_version.jar wordcount /tmp/input /tmp/output
hadoop fs -tail /tmp/output/part-r-00000

# 2 查看 hdfs name
hdfs getconf -confKey fs.default.name

# hdfs 彙總
hdfs dfsadmin -report

3.4 Apache Hive 安裝部署

3.4.1 配置前

# 1 下載
#  注意:因爲我們 Hadoop 安裝的是 3.0.x 版本的,所以下載Hive 時一定要下載支持 Hadoop 3.0 的,
#  例如 Hive 2.1.1 就無法支持 3.0.x 版本
#  wget http://archive.apache.org/dist/hive/hive-2.1.1/apache-hive-2.1.1-bin.tar.gz
wget http://archive.apache.org/dist/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz
tar -zxf apache-hive-3.1.2-bin.tar.gz -C /opt/installed/
cd /opt/installed/apache-hive-3.1.2-bin

# 2 配置環境變量
# set hive environment
export HIVE_HOME=/opt/installed/apache-hive-3.1.2-bin
export PATH=$PATH:$HIVE_HOME/bin

# 3 創建元數據庫,這裏以 MySQL 爲例
mysql>   create database metastore1;

# 4 添加Mysql驅動到$HIVE_HOME/lib 下  
wget https://repo1.maven.org/maven2/mysql/mysql-connector-java/5.1.47/mysql-connector-java-5.1.47.jar -P $HIVE_HOME/lib/

# 5 從template中複製出一份 hive 的配置文件
cd $HIVE_HOME/conf
cp hive-env.sh.template hive-env.sh
cp hive-default.xml.template hive-site.xml
cp hive-log4j2.properties.template hive-log4j2.properties
cp hive-exec-log4j2.properties.template hive-exec-log4j2.properties
cp beeline-log4j2.properties.template beeline-log4j2.properties

# 6 添加 Hive principal 和 keytab 文件。
# 因爲這是一個測試環境,爲例簡化,我們繼續使用 hadoop 的認證文件
# 因爲節點的認證票據已經獲取,所以這裏略過獲取票據的步驟

# 7 hdfs 中創建倉庫目錄
hadoop fs -mkdir -p /user/hive/warehouse

3.4.2 修改hive-env.sh

export JAVA_HOME=/usr/local/jdk1.8.0_231
export HADOOP_HOME=/opt/installed/hadoop-3.0.3
export HIVE_HOME=/opt/installed/apache-hive-3.1.2-bin
export HIVE_CONF_DIR=$HIVE_HOME/conf

3.4.3 修改hive-site.xml

主要修改如下配置項,其它可默認。同時注意配置文件的亂碼(最好將 配置文件中大概 3215 行中的 &#8;去掉)。

<configuration> 
  <property> 
    <name>hive.metastore.warehouse.dir</name>  
    <value>/user/hive/warehouse</value>  
    <description>Hive 數據存儲的路徑(在 HDFS 上的),也可可修改爲其它路徑。在建表時如果不指定位置,默認會將數據保存在這裏</description> 
  </property>  
  <!-- 遠程服務 HiveServer2 綁定的 IP -->  
  <property> 
    <name>hive.server2.thrift.bind.host</name>  
    <value>bigdata01</value>  
    <description>Bind host on which to run the HiveServer2 Thrift service.</description> 
  </property>  
  <property> 
    <name>hive.metastore.uris</name>  
    <!-- 這個很重要,在後面的 Presto 和 CarbonData 時,在需要配置 metastore 時需要這個值和 Hive 交互 -->  
    <value>thrift://bigdata01:9083</value>  
    <description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description> 
  </property>  
  <!-- 配置數據庫連接,用來存儲數據庫元信息 -->  
  <property> 
    <name>javax.jdo.option.ConnectionURL</name>  
    <value>jdbc:mysql://bigdata01:3306/metastore1?createDatabaseIfNotExist=true&amp;useUnicode=true&amp;characterEncoding=utf8&amp;useSSL=false</value>  
    <description>JDBC connect string for a JDBC metastore. To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL. For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.</description> 
  </property>  
  <!-- 配置 mysql 數據庫驅動名稱 -->  
  <property> 
    <name>javax.jdo.option.ConnectionDriverName</name>  
    <value>com.mysql.jdbc.Driver</value>  
    <description>Driver class name for a JDBC metastore</description> 
  </property>  
  <!-- Mysql 數據庫用戶名 -->  
  <property> 
    <name>javax.jdo.option.ConnectionUserName</name>  
    <value>root</value>  
    <description>Username to use against metastore database</description> 
  </property>  
  <!-- Mysql 數據庫登陸密碼 -->  
  <property> 
    <name>javax.jdo.option.ConnectionPassword</name>  
    <value>123456</value>  
    <description>password to use against metastore database</description> 
  </property>  
  <!-- 啓動時自動建表 -->  
  <property> 
    <name>datanucleus.schema.autoCreateAll</name>  
    <value>true</value>  
    <description>生產環境建議關閉,測試環境或者前期可以開啓,後期穩定後設置爲 false</description> 
  </property>  
  <property> 
    <name>hive.metastore.schema.verification</name>  
    <value>false</value>  
    <description>
      Enforce metastore schema version consistency.
      True: Verify that version information stored in is compatible with one from Hive jars.  Also disable automatic
            schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures
            proper metastore schema migration. (Default)
      False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.
    </description> 
  </property>  
  <!--Hive 的 job 臨時空間-->  
  <property> 
    <name>hive.exec.local.scratchdir</name>  
    <value>/tmp/hive/exec/${user.name}</value>  
    <description>Local scratch space for Hive jobs</description> 
  </property>  
  <property> 
    <name>hive.downloaded.resources.dir</name>  
    <value>/tmp/hive/${hive.session.id}_resources</value>  
    <description>Temporary local directory for added resources in the remote file system.</description> 
  </property>  
  <property> 
    <name>hive.querylog.location</name>  
    <value>/tmp/hive/${user.name}</value>  
    <description>Location of Hive run time structured log file</description> 
  </property>  
  <property> 
    <name>hive.server2.logging.operation.log.location</name>   
    <value>/tmp/hive/server2/${user.name}/operation_logs</value>  
    <description>Top level directory where operation logs are stored if logging functionality is enabled</description> 
  </property>  
  <!-- 配置執行動態分區的模式。nonstrict:不嚴格模式;strict:嚴格模式 -->  
  <property> 
    <name>hive.exec.dynamic.partition.mode</name>  
    <value>nonstrict</value>  
    <description>In strict mode, the user must specify at least one static partition in case the user accidentally overwrites all partitions. In nonstrict mode all partitions are allowed to be dynamic.</description> 
  </property> 
  
  <!-- Kerberos 認證 -->
  <property> 
    <name>hive.server2.authentication</name>  
    <value>KERBEROS</value>  
    <description>
      Expects one of [nosasl, none, ldap, kerberos, pam, custom].
      Client authentication types.
        NONE: no authentication check
        LDAP: LDAP/AD based authentication
        KERBEROS: Kerberos/GSSAPI authentication
        CUSTOM: Custom authentication provider
                (Use with property hive.server2.custom.authentication.class)
        PAM: Pluggable authentication module
        NOSASL:  Raw transport
    </description> 
  </property>
  <property>
    <name>hive.server2.authentication.kerberos.principal</name>
    <value>hadoop/[email protected]</value>
    <description>Kerberos server principal</description>
  </property>
  <property>
    <name>hive.server2.authentication.kerberos.keytab</name>
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value>
    <description>Kerberos keytab file for server principal</description>
  </property>
  <property>
    <name>hive.metastore.sasl.enabled</name>
    <value>true</value>
    <description>If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos.</description>
  </property>
  <property>
    <name>hive.metastore.kerberos.keytab.file</name>
    <value>/opt/installed/hadoop-3.0.3/etc/hadoop/hadoop.keytab</value>
    <description>The path to the Kerberos Keytab file containing the metastore Thrift server's service principal.</description>
  </property>
  <property>
    <name>hive.metastore.kerberos.principal</name>
    <value>hadoop/[email protected]</value>
    <description>
      The service principal for the metastore Thrift server. 
      The special string _HOST will be replaced automatically with the correct host name.
    </description>
  </property>
  
</configuration>

3.4.4 初始化 Hive及啓動

# 1 初始化 Hive 元數據。
#  這一步會在 Mysql 的 metastore 庫下初始化的表。(hdfs 用戶執行)
#  執行成功後,在 MySQL 的 metastore1 庫下會初始化生成元數據表
$HIVE_HOME/bin/schematool -dbType mysql -initSchema

# 2 啓動
hive --service metastore >/dev/null 2>&1 &
hive --service hiveserver2 >/dev/null 2>&1 &

# 3 使用 beeline 連接 Hive
beeline --color=true -d "org.apache.hive.jdbc.HiveDriver" \
-u "jdbc:hive2://bigdata01:10000/default;principal=hadoop/[email protected]"

3.4.5 中文註釋亂碼

中文註釋亂碼可以進入到 MySQL 中執行

use hive_metastore;
select * from COLUMNS_V2;
-- 查看 COLUMNS_V2 建表語句,可以發現 ENGINE=InnoDB DEFAULT CHARSET=latin1 ,使用的是 latin1 字符。注意:這個表可能隨版本名 字會不一樣,先查找一下 COLLUMNS 開頭的表
show create table COLUMNS_V2;
alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8;
alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
alter table PARTITION_KEYS modify column PKEY_COMMENT varchar(4000) character set utf8;
alter table DBS modify column `DESC` varchar(4000) character set utf8;

3.4.6 小例子

使用 Beeline 登陸 hive(關於 Beeline 的更詳細使用可查看我的另一篇 blog Beeline 的進階使用),創建表,並插入數據,執行簡單 SQL,測試 hive 是否可正常使用。

-- 1 建表
0: jdbc:hive2://bigdata02:10000/default> create table if not exists person (id int, name varchar(32) comment '姓名', age int);

-- 2 查看鍵表信息。可以發現中文註釋沒有出現亂碼
0: jdbc:hive2://bigdata02:10000/default> show create table person;
+----------------------------------------------------+
|                   createtab_stmt                   |
+----------------------------------------------------+
| CREATE TABLE `person`(                             |
|   `id` int,                                        |
|   `name` varchar(32) COMMENT '姓名',                 |
|   `age` int)                                       |
| ROW FORMAT SERDE                                   |
|   'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'  |
| STORED AS INPUTFORMAT                              |
|   'org.apache.hadoop.mapred.TextInputFormat'       |
| OUTPUTFORMAT                                       |
|   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION                                           |
|   'hdfs://bigdata02:8020/user/hive/warehouse/person' |
| TBLPROPERTIES (                                    |
|   'bucketing_version'='2',                         |
|   'transient_lastDdlTime'='1593865132')            |
+----------------------------------------------------+
15 rows selected (0.145 seconds)

-- 3 插入數據
0: jdbc:hive2://bigdata02:10000/default> insert into person values(100, "小蘭", 18),(102, "yore", 20);

-- 4 查詢表數據
0: jdbc:hive2://bigdata02:10000/default> SELECT * FROM person;
+------------+--------------+-------------+
| person.id  | person.name  | person.age  |
+------------+--------------+-------------+
| 100        | 小蘭           | 18          |
| 102        | yore         | 20          |
+------------+--------------+-------------+
2 rows selected (0.236 seconds)


4 Client

4.1 Windows 環境Kerberos 環境

注意:Windows 系統和 Linux 的文件路徑分隔符不同,這裏在配置文件中一定要使用 / 作爲文件分隔符。

# 1 下載。訪問 http:/4.1.2 Linux 環境
/web.mit.edu/kerberos/dist/
wget http://web.mit.edu/kerberos/dist/kfw/4.1/kfw-4.1-amd64.msi

# 2 直接雙擊下載 kfw-4.1-amd64.msi 進行安裝

# 3 安裝完畢後會提示重啓,直接重啓系統
# 安裝完畢後會自動安裝到 C:\Program Files\MIT\Kerberos\bin 下

# 4 krb5.ini
#  將 KDC 或 應用服務節點的 /etc/krb5.conf 下載到 windowns 下
#  將 krb5.conf 重命名爲 krb5.ini 放置到 C:\ProgramData\MIT\Kerberos5
#  注意:krb5.ini 的路徑和安裝的路徑不同,不要放錯了
# 在 win 系統的用戶環境變量用戶中添加。
#  注意 C:\temp 必須存在,但是其下面的 krb5cache 是認證通過後自動創建的緩存文件
變量名(N):	KRB5_CONFIG
變量值(V):	C:\ProgramData\MIT\Kerberos5\krb5.ini
變量名(N):	KRB5CCNAME
變量值(V):	C:\temp\krb5cache

# 5 配置 hosts
#  將服務的 /etc/hosts 中的 ip 添加到 C:\Windows\System32\drivers\etc\hosts 中
#  主要是用到的應用服務的 ip 和 KDC 中配置的 ip 

# 6 環境變量
#  klist 和 kinit 在 JDK 中也帶有,如果環境中已經配置了 JDK ,
#  path 中添加 C:\Program Files\MIT\Kerberos\bin 一定要上移到最靠前
#  case 1:當 jdk 的 klist 生效時,輸入 klist 提示  當前登錄 ID 是 0:0x80ea4 ……
#  case 2:當 Kerberos 的 klist 生效時,輸入 klist 提示  klist: No credentials cache file found 

# 7 將生成的 testuser.keytab 拷貝到客戶端節點。
# 例如放到家目錄下的 C:\ProgramData\MIT\Kerberos5\testuser.keytab 文件夾下


# 8 查看 keytab 文件信息
#  在 C:\Program Files\MIT\Kerberos 下執行。如果配了環境變量任意地方執行都行
.\bin\klist.exe -ket  C:\ProgramData\MIT\Kerberos5\testuser.keytab
 
# 9 獲取認證的 Ticket
 .\bin\kinit.exe -kt C:\ProgramData\MIT\Kerberos5\testuser.keytab  testuser/[email protected]
 
# 10 查看本地緩存的認證票據
 klist
# 也可以通過 Kerberos 客戶端工具 C:\Program Files\MIT\Kerberos\bin\MIT Kerberos.exe 查看

4.2 Linux 環境Kerberos 環境

Linux 下的安裝同 應用服務節點安裝類似

# 1查看環境是否已經安裝了 Kerberos 客戶端環境
rpm -qa | grep -E "krb5|libkadm5"

# 2下載所需 Kerberos 客戶端包
wget http://mirror.centos.org/centos/7/os/x86_64/Packages/krb5-libs-1.15.1-46.el7.x86_64.rpm
wget http://mirror.centos.org/centos/7/os/x86_64/Packages/krb5-workstation-1.15.1-46.el7.x86_64.rpm
wget http://mirror.centos.org/centos/7/os/x86_64/Packages/libkadm5-1.15.1-46.el7.x86_64.rpm

# 3安裝
rpm -iUh krb5-libs-1.15.1-46.el7.x86_64.rpm
rpm -ivh libkadm5-1.15.1-46.el7.x86_64.rpm
rpm -ivh krb5-workstation-1.15.1-46.el7.x86_64.rpm

# 4 獲取 /etc/krb5.conf 
# 將 KDC 或者應用服務器上的 /etc/krb5.conf 拷貝到 客戶端機器 的  /etc 下即可

# 5 hosts配置,
## 按照 一、環境 節進行配置,主要是將 CDH 的ip 和 hostname 進行映射配置,注意內網和外網的區別
## Linux 環境配置:/etc/hosts

# 6 查看導入的 keytab 文件(注意文件權限)
/usr/bin/klist -ket ~/testuser.keytab

# 7 獲取認證票據
kinit -kt ~/testuser.keytab testuser/[email protected]

# 8 查看 獲取的緩存票據
klist 

4.3 DBeaver

下面將以 Windows 系統環境爲例。

4.3.1 下載及解壓

訪問 https://dbeaver.io/download/ 進行下載,例如下載 Windows 免安裝版的 Windows 64 bit (zip)

wget https://dbeaver.io/files/dbeaver-ce-latest-win32.win32.x86_64.zip

# 解壓。因爲是免安裝的,直接解壓後即可使用
unzip dbeaver-ce-latest-win32.win32.x86_64.zip

4.3.2 修改 dbeaver.ini

# 在 dbeaver/dbeaver.ini 中添加如下參數
-Djavax.security.auth.useSubjectCredsOnly=false
-Djava.security.krb5.conf=C:\ProgramData\MIT\Kerberos5\krb5.ini
-Dsun.security.krb5.debug=true
#-Djava.security.auth.login.config=C:\ProgramData\MIT\Kerberos5\jaas.conf

4.3.3 獲取 hive 驅動 - 通過 Maven 項目導出Hive JDBC 驅動包

前提,系統中已經安裝了 JDKMaven 環境。在某目錄下新建一個文件夾作爲項目根目錄,在這個目錄下新建 pom.xml 文件,其中主要配置如下,項目的 GAV 值可以根據自己感覺隨意寫的,重點是其中引入自己所需版本的 hive-jdbc 依賴,例如引入3.1.2 版本的 Hive jdbc。

<?xml version="1.0" encoding="utf-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  
  <modelVersion>4.0.0</modelVersion>  
  <artifactId>dbeaver-hive-driver</artifactId>  
  <groupId>com.yore</groupId>  
  <version>1.0.0-SNAPSHOT</version>  
  <packaging>jar</packaging>  
  <dependencies> 
    <!-- https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc -->  
    <dependency> 
      <groupId>org.apache.hive</groupId>  
      <artifactId>hive-jdbc</artifactId>  
      <version>3.1.2</version> 
    </dependency> 
  </dependencies> 
</project>

接着執行如下命令導入 hive jdbc 的依賴到某個文件下

mvn dependency:copy-dependencies -DoutputDirectory=./output
# 爲了方便傳輸,可以將其打包
tar -zcf dbeaver-hive-driver.tar.gz ./output

打開 DBeave,新建Hive連接,編輯驅動,主要配置如下項:

  • URL 模板: jdbc:hive2://{host}[:{port}][/{database}];principal=hive/{host}@SINOSIG.COM
  • 添加文件夾:上一步導出的文件夾
  • 找驅動: org.apache.hive.jdbc.HiveDriver
    在這裏插入圖片描述

4.3.4 獲取 hive 驅動 - 下載通用Hive驅動包

直接下載 DBeaver 默認推薦的驅動包 hive-jdbc-uber-2.6.5.0-292.jar ,包加載導入,創建 Hive 連接時,連接成功如下所示(注意 principal=hadoop/{host}@YORE.COM,hadoop 爲 hive-site.xml 中配置的服務表示,不能更換爲其它)。

  • JDBC URL: jdbc:hive2://bigdata01:10000/default;principal=hadoop/[email protected]
  • 主機: bigdata01
  • 端口: 10000
  • 數據庫/模式:default

在這裏插入圖片描述

4.4 Kettle

4.4.1 下載及其大數據插件配置

# 1 下載(訪問下面連接,選擇一個版本下載)
# 或者訪問 https://sourceforge.net/projects/pentaho/files/Data%20Integration/
# 這裏推薦 訪問 http://mirror.bit.edu.cn/pentaho/

# 2 下載 6.1 版本
# 訪問 http://mirror.bit.edu.cn/pentaho/Data%20Integration/6.1/,下載 6.1 版本壓縮包
wget http://mirror.bit.edu.cn/pentaho/Data%20Integration/6.1/pdi-ce-6.1.0.1-196.zip

# 3 解壓
# 解壓出後爲 data-integration 文件夾,
unzip pdi-ce-6.1.0.1-196.zip

# 4 pentaho-big-data-plugin
# 關於大數據支持的插件主要爲 data-integration/plugins/pentaho-big-data-plugin
# 4.1 配置使用的 Hadoop 版本
##  修改 data-integration/plugins/pentaho-big-data-plugin/plugin.properties
##  在其下面的 hadoop-configurations 文件夾下可以看大可選的有:cdh55、emr310、hdp23、mapr410
##  這裏以 cdh55 爲例,在 plugin.properties 中配置,重點關注下面兩個配置項,否者例如連接Hive 時會找不到合適的 Driver
#     active.hadoop.configuration=cdh55
#     hadoop.configurations.path=hadoop-configurations
# 4.2 訪問 Hadoop 時首先用到配置文件 core-site.xml,
#   將上產環境的 $HADOOP_HOME/etc/hadoop/core-site.xml 
#    放到 data-integration/plugins/pentaho-big-data-plugin/hadoop-configurations/cdh55 下
#    注意編碼格式,統一爲 UTF-8


# 5 啓動
## 啓動前請按照下面的系統,按照下面的方式修改修改對應的腳本,然後啓動
## 4.1 Win 系統雙加執行 data-integration\Spoon.bat
## 4.2 Linux 系統 sh data-integration/spoon.sh

4.4.2 修改Kettle 啓動腳本

這裏需要注意的,訪問應用服務環境 hive 的主體 principal 統一爲 hadoop/[email protected],而不是用戶自己的principal ,用戶自己的 principal 僅作爲客戶端機器獲取認證票據。

# 1 創建文件 kettle.login,寫入如下內容並保存
# 例如:Windows 放置在 C:/ProgramData/MIT/Kerberos5/kettle.login
#       Linux 放置在 /etc/kettle.login
com.sun.security.jgss.initiate{
   com.sun.security.auth.module.Krb5LoginModule required
   useKeyTab=true
   useTicketCache=false
   keyTab="C:/ProgramData/MIT/Kerberos5/testuser.keytab"
   principal="hadoop/[email protected]"
   doNotPrompt=true
   debug=true
   debugNative=true;
};

# 2 修改 Kettle 啓動腳本
## 2.1 win 系統修改 data-integration\Spoon.bat
#  對大概 96 行左右的 OPT 變量修改爲如下:
#  主要添加了如下四個參數(注意每個參數用引號引起,空格分開,如果想開啓動的信息,在啓動腳本最後一行加上 pause):
#    "-Djava.security.auth.login.config=C:/ProgramData/MIT/Kerberos5/kettle.login"
#    "-Djava.security.krb5.realm=YORE.COM"
#    "-Djava.security.krb5.kdc=192.168.33.9"
#    "-Djavax.security.auth.useSubjectCredsOnly=false"
set OPT=%OPT% %PENTAHO_DI_JAVA_OPTIONS% "-Dhttps.protocols=TLSv1,TLSv1.1,TLSv1.2" "-Djava.library.path=%LIBSPATH%" "-Djava.security.auth.login.config=C:/ProgramData/MIT/Kerberos5/kettle.login"  "-Djava.security.krb5.realm=YORE.COM" "-Djava.security.krb5.kdc=192.168.33.9" "-Djavax.security.auth.useSubjectCredsOnly=false" "-DKETTLE_HOME=%KETTLE_HOME%" "-DKETTLE_REPOSITORY=%KETTLE_REPOSITORY%" "-DKETTLE_USER=%KETTLE_USER%" "-DKETTLE_PASSWORD=%KETTLE_PASSWORD%" "-DKETTLE_PLUGIN_PACKAGES=%KETTLE_PLUGIN_PACKAGES%" "-DKETTLE_LOG_SIZE_LIMIT=%KETTLE_LOG_SIZE_LIMIT%" "-DKETTLE_JNDI_ROOT=%KETTLE_JNDI_ROOT%"


## 2.2 Linux 系統修改 data-integration/spoon.sh
## 大概在 201 行,同 Windows 系統類似,添加四個參數
OPT="$OPT $PENTAHO_DI_JAVA_OPTIONS -Dhttps.protocols=TLSv1,TLSv1.1,TLSv1.2 -Djava.library.path=$LIBPATH -Djava.security.auth.login.config=/etc/kettle.login -Djava.security.krb5.realm=YORE.COM -Djava.security.krb5.kdc=192.168.33.9 -Djavax.security.auth.useSubjectCredsOnly=false -DKETTLE_HOME=$KETTLE_HOME -DKETTLE_REPOSITORY=$KETTLE_REPOSITORY -DKETTLE_USER=$KETTLE_USER -DKETTLE_PASSWORD=$KETTLE_PASSWORD -DKETTLE_PLUGIN_PACKAGES=$KETTLE_PLUGIN_PACKAGES -DKETTLE_LOG_SIZE_LIMIT=$KETTLE_LOG_SIZE_LIMIT -DKETTLE_JNDI_ROOT=$KETTLE_JNDI_ROOT"

4.4.3 連接 Hive

  • 連接名稱: by_yourself
  • 連接類型: Hadoop Hive 2
  • 主機名稱: hiveserver2 服務地址,例如:bigdata01 或 192.168.33.3
  • 數據庫名稱: default;principal=hadoop/[email protected]
  • 端口號: 10000
  • 用戶名:
  • 密碼:

數據庫名後面加上 Kerberos 認證的主體,這裏統一爲 hadoop/[email protected],而不是用戶自己的testuser/[email protected],用戶名和密碼沒有設置,直接爲空即可,其它參數不用配置。連接成功後如下圖所示
在這裏插入圖片描述



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章