hadoop HA
一、原理
HA高可用
存儲
當editlog發生變化時,則直接寫入JournalNode,以用來分享給其他的NameNode
二、安裝部署
step1、配置zookeeper
step2、配置hadoop
hadoop-env.sh
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/modules/cdh/hadoop-2.5.0-cdh5.3.6/data</value>
</property>
</configuration>
zookeeper地址
<property>
<name>ha.zookeeper.quorum</name> <value>hlh001:2181,hlh002:2181,hlh003:2181</value>
</property>
hdfs-site.xml
<configuration>
<!-- 指定數據冗餘份數 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 完全分佈式集羣名稱 -->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!-- 集羣中NameNode節點都有哪些 -->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>z04:8020</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>z05:8020</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>z04:50070</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>z05:50070</value>
</property>
<!-- 指定NameNode元數據在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://z04:8485;z05:8485;z06:8485/mycluster</value>
</property>
<!-- 配置隔離機制,即同一時刻只能有一臺服務器對外響應 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- 使用隔離機制時需要ssh無祕鑰登錄-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/z/.ssh/id_rsa</value>
</property>
<!-- 聲明journalnode服務器存儲目錄-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/modules/cdh/hadoop-2.5.0-cdh5.3.6/data/jn</value>
</property>
<!-- 關閉權限檢查-->
<property>
<name>dfs.permissions.enable</name>
<value>false</value>
</property>
<!-- 訪問代理類:client,mycluster,active配置失敗自動切換實現方式-->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
</configuration>
故障自動轉移
<property>
<name>dfs.ha.automatic-failover.enabled</name> <value>true</value>
</property>
yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://z01:19888/jobhistory/logs/</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
<!--啓用resourcemanager ha-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--聲明兩臺resourcemanager的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster-yarn1</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hlh002</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hlh003</value>
</property>
<!--指定zookeeper集羣的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hlh001:2181,hlh002:2181,hlh003:2181</value>
</property>
<!--啓用自動恢復-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--指定resourcemanager的狀態信息存儲在zookeeper集羣-->
<property>
<name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
</configuration>