Step1:集羣規劃
Step2:設置hosts
Step3:關閉防火牆
Step4:關閉Selinux
Step5:免密鑰登陸
Step6:安裝jdk
#解壓
tar -xvf jdk-8u131-linux-x64.tar.gz
mv jdk1.8.0_131 /usr/local/jdk1.8
#設置環境變量
vim /etc/profile
JAVA_HOME=/usr/local/jdk1.8/
JAVA_BIN=/usr/local/jdk1.8/bin
JRE_HOME=/usr/local/jdk1.8/jre
PATH=$PATH:/usr/local/jdk1.8/bin:/usr/local/jdk1.8/jre/bin
CLASSPATH=/usr/local/jdk1.8/jre/lib:/usr/local/jdk1.8/lib:/usr/local/jdk1.8/jre/lib/charsets.jar
Step7:安裝zookeeper
#下載
[root@sl-opencron src]# wget http://www-eu.apache.org/dist/zookeeper/zookeeper-3.4.10/zookeeper-3.4.10.tar.gz
#解壓
[root@sl-opencron src]# tar -xvf zookeeper-3.4.10.tar.gz
#解壓後的目錄mv 到/usr/local/下
[root@sl-opencron src]# mv zookeeper-3.4.10 /usr/local/zookeeper
Step7.1:配置zookeeper
cd /usr/local/zookeeper/conf/
#將zoo_sample.cfg(樣板配置文件)命名爲zoo.cfg
mv zoo_sample.cfg zoo.cfg
#修改配置文件
[root@sl-opencron conf]# vim zoo.cfg
***********
***********
#路徑可自定義
dataDir=/data/zookeeper
server.1=139.162.53.29:2888:3888
server.2=139.162.53.146:2888:3888
server.3=139.162.8.37:2888:3888
Step7.2:生成myid文件
mkdir /data/zookeeper
cd /data/zookeeper
touch myid
echo "1" >> myid
說明:cdh-1 myid是1 cdh-2 myid是2 cdh-3 myid是3
Step7.3:啓動zookeeper集羣
說明:分別在cdh-1 cdh-2 cdh-3
cd /usr/local/zookeeper/bin
./zkServer.sh start
Step8:安裝hadoop
#下載
wget http://www-eu.apache.org/dist/hadoop/common/hadoop-2.7.6/hadoop-2.7.6.tar.gz
#解壓
tar -xvf hadoop-2.7.6.tar.gz
#解壓後的目錄移動到/usr/local/
mv hadoop-2.7.6 /usr/local/hadoop
#進hadoop目錄
cd /usr/local/hadoop
#創建幾個目錄
[root@hadooop-master hadoop]# mkdir tmp dfs dfs/data dfs/name
Step8.1:配置hadoop
vim core-site.xml
<configuration>
<!-- 指定hdfs的nameservice爲ns1 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1/</value>
</property>
<!-- 指定hadoop暫時文件夾 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/tmp</value>
</property>
<!--指定zookeeper地址-->
<property>
<name>ha.zookeeper.quorum</name>
<value>cdh-1:2181,cdh-2:2181,cdh-3:2181</value>
</property>
</configuration>
vim hdfs-site.xml
<configuration>
<!--指定hdfs的nameservice爲ns1,須要和core-site.xml中的保持一致 -->
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<!-- ns1以下有兩個NameNode,各自是nn1,nn2 -->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>cdh-4:9000</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>cdh-4:50070</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>cdh-5:9000</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>cdh-5:50070</value>
</property>
<!-- 指定NameNode的元數據在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://cdh-1:8485;cdh-2:8485;cdh-3:8485/ns1</value>
</property>
<!-- 指定JournalNode在本地磁盤存放數據的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/usr/local/hadoop/dfs/data</value>
</property>
<!-- 開啓NameNode失敗自己主動切換 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失敗自己主動切換實現方式 -->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔離機制方法。多個機制用換行切割,即每一個機制暫用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用sshfence隔離機制時須要ssh免登陸 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔離機制超時時間 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<property>
<name>ipc.client.connect.max.retries</name>
<value>30</value>
<property>
</configuration>
vim yarn.site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<!-- 開啓RM高可用 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-cluster-rm</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分別指定RM的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>cdh-4</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>cdh-5</value>
</property>
<!-- 指定zookeeper集羣地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>cdh-1:2181,cdh-2:2181,cdh-3:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
vim mapred-site.xml
<configuration>
<!-- 指定mr框架爲yarn方式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
vim slaves
cdh-1
cdh-2
cdh-3
Step8.2:同步到每臺服務器
rsync -av /usr/local/hadoop/etc/ cdh-1:/usr/local/hadoop/etc/
rsync -av /usr/local/hadoop/etc/ cdh-2:/usr/local/hadoop/etc/
rsync -av /usr/local/hadoop/etc/ cdh-3:/usr/local/hadoop/etc/
rsync -av /usr/local/hadoop/etc/ cdh-4:/usr/local/hadoop/etc/
rsync -av /usr/local/hadoop/etc/ cdh-5:/usr/local/hadoop/etc/
Step9:啓動journalnode
說明:分別在cdh-1 cdh-2 cdh-3啓動
cd /usr/local/hadoop/sbin/
./hadoop-daemon.sh start journalnode
Step10:格式化HDFS
說明:在cdh-4操作
cd /usr/local/hadoop/bin/
./hdfs namenode -format
#注意:格式化之後須要把tmp文件夾拷給cdh-5(不然cdh-5的namenode起不來)
cd /usr/local/hadoop/
scp -r tmp/ cdh-5:/usr/local/hadoop/
VERSION 100% 207 222.7KB/s 00:00
fsimage_0000000000000000000.md5 100% 62 11.3KB/s 00:00
fsimage_0000000000000000000 100% 321 327.3KB/s 00:00
seen_txid 100% 2 1.4KB/s 00:00
Step11:格式化ZKFC
說明:在cdh-4操作
cd /usr/local/hadoop/bin/
./hdfs zkfc -formatZK
Step12:啓動yarn
說明:在cdh-4操作
cd /usr/local/hadoop/sbin/
./start-yarn.sh
Step13:cdh-5的resourcemanager須要手動單獨啓動
cd /usr/local/hadoop/sbin/
./yarn-daemon.sh start resourcemanager
Step14:查看集羣進程
[root@cdh-1 ~]# jps
26754 QuorumPeerMain
22387 JournalNode
5286 Jps
4824 NodeManager
25752 DataNode
[root@cdh-2 ~]# jps
4640 JournalNode
29520 QuorumPeerMain
5799 Jps
4839 DataNode
5642 NodeManager
[root@cdh-3 ~]# jps
28738 JournalNode
28898 DataNode
29363 NodeManager
20836 QuorumPeerMain
29515 Jps
[root@cdh-4 ~]# jps
21491 Jps
21334 NameNode
20167 DFSZKFailoverController
21033 ResourceManager
[root@cdh-5 ~]# jps
20403 ResourceManager
20280 NameNode
20523 Jps
19693 DFSZKFailoverController
Step15:測試高可用集羣
說明:如圖可見cdh-5的狀態是active cdh-4的狀態是standby
Step15.1: 把cdh-5 Namenode stop 掉
[root@cdh-5 hadoop]# cd /usr/local/hadoop/sbin/
[root@cdh-5 sbin]# ./hadoop-daemon.sh stop namenode
stopping namenode
[root@cdh-5 sbin]# ./hadoop-daemon.sh start namenode
說明:將cdh-5 Namenode stop 掉,刷新cdh-4 可以看到現在cdh-4的狀態是active ,cdh-5 的狀態是standby