[原創] Centos 安裝部署 Hadoop 集羣和 hive

Centos 7.3 安裝部署 Hadoop集羣

環境部署

Server: Centos 7.3 (CPU:1 Core Mem: 1G Disk: 40G)
Hadoop: 2.8.0
Hive: 2.1.1
JDK: 1.8.0
---
Master: 192.168.10.1
Hadoop-01: 192.168.10.2
Hadoop-02: 192.168.10.3

如果沒有特別聲明, 所有的步驟在所有機器上都執行.

配置服務器

更新系統

yum clean all
yum update -y

創建交換區 (重啓生效)

# 創建3G 交換區, 交換區大小一般爲物理內存2倍
dd if /dev/zero of=/swapfile bs=1024 count=3M
chmod 600 /swapfile
mkswap /swapfile
swapon /swafile
echo /swapfile  swap  swap defaults 0 0 >> /etc/fstab

修改 hostname

# 分別在對應的機器上執行
$ echo master > /etc/hostname # 在 master 執行
$ echo hadoop-01 > /etc/hostname # 在 hadoop-01執行
$ echo hadoop-02 > /etc/hostname # 在 hadoop-02 執行
$ echo 192.168.10.1 master >> /etc/hosts
$ echo 192.168.10.2 hadoop-01 >> /etc/hosts
$ echo 192.168.10.3 hadoop-02 >> /etc/hosts

安裝 JDK

# search JDK version list
$ yum search openjdk
$ yum install -y java-1.8.0-openjdk # 注意版本號, 用上一步收到的JDK版本
$ java -version 
openjdk version "1.8.0_131"
OpenJDK Runtime Environment (build 1.8.0_131-b11)
OpenJDK 64-Bit Server VM (build 25.131-b11, mixed mode)
# 這裏記錄下 JDK 安裝路徑在 /usr/lib/jvm/jre-openjdk/, 後面配置 JAVA_HOME 需要

添加用戶

# 添加 eshadoop 用戶, 添加至默認 sudo 用戶組,和 hadoop 組
$ add user hadoop;adduser eshadoop -G hadoop, wheel
$ password eshadoop # 配置 eshadoop 密碼
$ groups eshadoop
eshadoop: eshadoop wheel hadoop

配置集羣之間免密登錄

# 使用 eshadoop 重新登錄
$ yum install -y ssh
$ ssh-keygen -t rsa -C [email protected]
$ ssh-copy-id localhost
$ ssh-copy-id master
$ ssh-copy-id hadoop-01
$ ssh-copy-id hadoop-02

# 編輯 /etc/ssh/sshd_config
# 找到 RSAAuthentication, PubkeyAuthentication, AuthorizedKeysFile, 將行首的#號去掉
# 正確結果如果
RSAAuthentication Yes
PubkeyAuthentication Yes
AuthorizedKeysFile .ssh/authorized_keys
$ sudo service sshd restart

安裝 Hadoop

# 我使用的版本是2.8.0
$ wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.8.0/hadoop-2.8.0.tar.gz
$ sudo tar zxvf hadoop-2.8.0.tar.gz -C /usr/local/
$ sudo mv /usr/local/hadoop-2.8.0/ /usr/local/hadoop
$ sudo chown -R eshadoop:hadoop /usr/local/hadoop

配置環境變量

# 我使用的是 .bashrc, 只針對當前用戶生效. 如果要設置爲全局生效, 修改 /etc/profile 即可
$ vi ~/.bashrc
# 將下面追加至 .bashrc
# ----------- .bashrc START ----------
#Set JAVA_HOME
export JAVA_HOME=/usr/lib/jvm/jre-openjdk/
#Set Hadoop related environment variable
export HADOOP_INSTALL=/usr/local/hadoop
#Add bin and sbin directory to PATH
export PATH=$PATH:$HADOOP_INSTALL/bin
export PATH=$PATH:$HADOOP_INSTALL/sbin
#Set few more Hadoop related environment variable
export HADOOP_MAPRED_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_HOME=$HADOOP_INSTALL
export HADOOP_HDFS_HOME=$HADOOP_INSTALL
export YARN_HOME=$HADOOP_INSTALL
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_INSTALL/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_INSTALL/lib"
export HADOOP_HOME=/usr/local/hadoop/
# -------- .bashrc END -----------


$ hadoop version
Hadoop 2.8.0
Subversion https://git-wip-us.apache.org/repos/asf/hadoop.git -r 91f2b7a13d1e97be65db92ddabc627cc29ac0009
Compiled by jdu on 2017-03-17T04:12Z
Compiled with protoc 2.5.0
From source with checksum 60125541c2b3e266cbf3becc5bda666
This command was run using /usr/local/hadoop/share/hadoop/common/hadoop-common-2.8.0.jar

配置 Hadoop

配置 slaves
$ echo master > /usr/local/hadoop/etc/hadoop/slaves
$ echo hadoop-01 >> /usr/local/hadoop/etc/hadoop/slaves
$ echo hadoop-02 >> /usr/local/hadoop/etc/hadoop/slaver
配置 hdfs-fite.xml
# 將一下 xml 替換掉 hdfs-site.xml 中的 <configuration/>
$ vi /usr/local/hadoop/etc/hadoop/hdfs-site.xml
    <configuration>
        <property>
            <name>dfs.namenode.secondary.http-address</name>
            <value>master:9001</value>
        </property>
        <property>
           <name>dfs.namenode.name.dir</name>
           <value>file:/home/eshadoop/hdfs/tmp/dfs/name</value>
        </property>
        <property>
            <name>dfs.datanode.data.dir</name>
            <value>file:/home/eshadoop/hdfs/tmp/dfs/data</value>
        </property>
        <property>
          <name>dfs.replication</name>
          <value>3</value>
        </property>
    </configuration>
配置 mapred-site.xml
$ vi /usr/local/hadoop/etc/hadoop/mapred-site.xml
# 將一下 configuration 替換掉原文中的 <configuration/>
    <configuration>
            <property>
                    <name>mapreduce.framework.name</name>
                    <value>yarn</value>
            </property>
            <property>
                    <name>mapreduce.jobhistory.address</name>
                    <value>master:10020</value>
            </property>
            <property>
                    <name>mapreduce.jobhistory.webapp.address</name>
                    <value>master:19888</value>
            </property>
    </configuration>
配置 yarn-site.xml
$ /usr/local/hadoop/etc/hadoop/yarn-site.xml
    <configuration>
        <property>
          <name>yarn.nodemanager.aux-services</name>
          <value>mapreduce_shuffle</value>
        </property>
        <property>
          <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
          <value>org.apache.hadoop.mapred.ShuffleHandler</value>
        </property>
        <property>
          <name>yarn.resourcemanager.address</name>
          <value>master:8032</value>
        </property>
        <property>
          <name>yarn.resourcemanager.scheduler.address</name>
          <value>master:8030</value>
        </property>
        <property>
          <name>yarn.resourcemanager.resource-tracker.address</name>
          <value>master:8035</value>
        </property>
        <property>
          <name>yarn.resourcemanager.admin.address</name>
          <value>master:8033</value>
        </property>
        <property>
          <name>yarn.resourcemanager.webapp.address</name>
          <value>master:8088</value>
        </property>
    </configuration>
配置 core-site.xml
$ vi /usr/local/hadoop/etc/hadoop/core-site.xml
# 注意: hadoop.proxyuser.eshadoop.** eshadoop 應該和 hadoop 所有者一致! 否則後續的 Hive2 無法登陸/建表/導入數據
    <configuration>
        <property>
            <name>hadoop.tmp.dir</name>
            <value>/home/eshadoop/hdfs/tmp</value>
            <description>A base for other temporary directories.</description>
        </property>
        <property>
            <name>fs.default.name</name>
            <value>hdfs://master:9000</value>
        </property>
        <property>
            <name>hadoop.proxyuser.eshadoop.hosts</name>
            <value>*</value>
        </property>
        <property>
            <name>hadoop.proxyuser.eshadoop.groups</name>
            <value>*</value>
            </property>
    </configuration>
$ mkdir -p ~/hdfs/tmp/

到這裏, hadoop 已經配置完成,
特別注意
請保持所有機器的core-site.xml中fs.default.name 都爲 hdfs://master:9000. 否則無法在 web 界面無法看到 nodename

啓動 hadoop

$ hadoop namenode -format
$ start-all.sh #  文件路徑在 /usr/local/hadoop/sbin/

安裝 Hive

 $ wget https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-2.1.1/apache-hive-2.1.1-bin.tar.gz
 $ sudo tar zxvf apache-hive-2.1.1-bin.tar.gz -C /usr/local/
 $ sudo mv /usr/local/apache-hive-2.1.1-bin /usr/local/hive
 $ sudo chown -R eshadoop /usr/local/hive
配置環境變量
$ vi ~/.bashrc
# 將下面追加至 .bashrc
# ----------- .bashrc START ----------
# config hive
export HIVE_HOME=/usr/local/hive
export PATH=$PATH:$HIVE_HOME/bin
export CLASSPATH=$CLASSPATH:/usr/local/hive/lib/*:.
export HADOOP_USER_CLASSPATH_FIRST=true
# export HIVE_AUX_JARS_PATH=/opt/lib/elasticsearch-hadoop-2.1.1.jar
# -----------  .bashrc END  ----------
$ source ~/.bashrc # 配置生效
配置 hive-env.sh
$ cp $HIVE_HOME/conf/hive-env.sh.template $HIVE_HOME/conf/hive-env.sh
$ vi $HIVE_HOME/conf/hive-env.sh
# ----------- hive-env.sh START -------------
# 配置 HADOOP_HOME 
HADOOP_HOME=/usr/local/hadoop
# -----------  hive-env.sh END  -------------
配置 hive-log4j2.properties
$ cp $HIVE_HOME/conf/hive-log4j2.properties.template $HIVE_HOME/conf/hive-log4j2.properties
$ vi $HIVE_HOME/conf/hive-log4j2.properties
# 配置 log 日誌路徑: 
property.hive.log.dir = /hive/log/
其他
$ cp $HIVE_HOME/conf/hive-default.xml.template $HIVE_HOME/conf/hive-default.xml
$ cp $HIVE_HOME/conf/hive-site.xml.template $HIVE_HOME/conf/hive-site.xml
# hive-site.xml修改配置如下
hive.exec.scratchdir -- /hive/
hive.exec.local.scratchdir -- /hive/
hive.downloaded.resources.dir -- /hive/sessions/${hive.session.id}_resources
hive.hbase.snapshot.restoredir -- /hive/snapshot/
hive.querylog.location -- /hive/log/querylog
hive.service.metrics.file.location -- /hive/report.json
hive.server2.logging.operation.log.location -- /hive/log/operation_logs
hive.llap.io.allocator.mmap.path -- /hive/llap/
# -- end --
$ cp $HIVE_HOME/conf/hive-exec-log4j2.properties.template $HIVE_HOME/conf/hive-exec-log4j2.properties
配置 hive 用戶名密碼 (這裏有些問題, 需要再驗證)
$ hive --service metastore
$ schematool -initSchema -dbType derby --verbose -userName eshadoop -passWord eshadoop
$ hdfs dfsadmin –refreshSuperUserGroupsConfiguration
$ yarn rmadmin -refreshSuperUserGroupsConfiguration
$ hdfs dfsadmin -fs hdfs://client-01:9000 -refreshSuperUserGroupsConfiguration
$ hdfs dfsadmin -fs hdfs://client-02:9000 -refreshSuperUserGroupsConfiguration
$ hdfs dfsadmin -fs hdfs://master:9000 -refreshSuperUserGroupsConfiguration
$ stop-all.sh
$ start-all.sh

修改 hive-site.xml hive.server2.enable.doAs 爲 false 可以匿名

啓動 hive2 sever
$ hive --service hiveserver2
測試 hive2 client
# 創建測試文件, 用於導入數據
$ echo 1 a >> /tmp/test_user.csv
$ echo 2 v >> /tmp/test_user.csv
$ echo 3 c >> /tmp/test_user.csv
$ echo 4 d >> /tmptest_user.csv

# 注意 hive2的用戶名和密碼是hadoop/core-site.xml中用戶一致.
$ beeline -u jdbc:hive2://xxx.xxx.xxx.xxx:10000/default -n eshadoop -p eshadoop
# 進入 hive2 環境
# 建表 - 分隔符數空格(文件導入時分隔符要保持一致
0: jdbc:hive2://120.25.94.189:10000/default> create table users(id string, name string)  row format delimited fields terminated by " ";
No rows affected (1.269 seconds)
# 列出所有表
0: jdbc:hive2://120.25.94.189:10000/default> show tables;
+-----------+--+
| tab_name  |
+-----------+--+
| users     |
+-----------+--+
# 從文件導入數據
0: jdbc:hive2://120.25.94.189:10000/default> load data local inpath "/tmp/test_user.csv" into table users;
No rows affected (1.979 seconds)
# 查詢數據
0: jdbc:hive2://120.25.94.189:10000/default> select * from users;
+-----------+-------------+--+
| users.id  | users.name  |
+-----------+-------------+--+
| 1         | a           |
| 2         | b           |
| 3         | c           |
| 4         | d           |
+-----------+-------------+--+
4 rows selected (1.647 seconds)

參考

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章