一. 軟硬件配置
1.1 主機配置及規劃
hostname | hp5 | hp6 | hp7 |
---|---|---|---|
ip | 10.31.1.119 | 10.31.1.120 | 10.31.1.121 |
內存 | 8G | 8G | 8G |
username | root | root | root |
IP | 主機名 | 功能 |
---|---|---|
10.31.1.119 | hp5 | NameNode DataNode ResourceManager NodeManager |
10.31.1.120 | hp6 | DataNode NodeManager |
10.31.1.121 | hp7 | SecondryNameNode DataNode NodeManager |
1.2 軟件配置
軟件 | 版本 |
---|---|
CentOS | 7.8 |
Java | JDK 8 |
Hadoop | 3.3.2 |
最開始使用open jdk11,後面安裝hive和hudi的時候出了問題,改回JDK8版本後,問題解決。
1.3 安裝常用的工具
yum install -y epel-release
yum install -y net-tools
yum install -y vim
二. 安裝前準備
2.1 設置主機名
hostname hadoop01 #設置臨時主機名
vi /etc/hostname #設置永久主機名
2.2 設置hosts
10.31.1.119 hp5
10.31.1.120 hp6
10.31.1.121 hp7
2.3 關閉防火牆
systemctl stop firewalld #關閉防火牆
systemctl disable firewalld #禁用防火牆
2.4 ssh免密登陸
#在每個節點上執行(所有節點)
ssh-keygen -t rsa #輸入內容直接回車
#將生成的公鑰添加到免密登陸keys中(主節點)
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
#將公鑰複製到2臺從節點上(主節點)
scp ~/.ssh/id_rsa.pub root@hp6:~/
scp ~/.ssh/id_rsa.pub root@hp7:~/
#在從節點上將主節點的公鑰添加到免密登陸keys中(非主節點)
cat ~/id_rsa.pub >> ~/.ssh/authorized_keys
2.5 ntpdate時間同步
yum -y install ntpdate
#同步時間
ntpdate -u ntp.sjtu.edu.cn
#定時同步,每小時整點執行一次
vi /etc/crontab
0 */1 * * * root ntpdate -u ntp.sjtu.edu.cn
三. 安裝
3.1 安裝hadoop
3.1.1 下載hadoop並解壓
cd /home/software
#下載
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.3.2/hadoop-3.3.2.tar.gz --no-check-certificate
#解壓
tar -zxvf hadoop-3.3.2.tar.gz -C /home
3.1.2 配置hadoop_home環境變量
vi /etc/profile
export HADOOP_HOME=/home/hadoop-3.3.2
source /etc/profile
3.1.3 編輯etc/hadoop/hadoop-env.sh
cd /home/hadoop-3.3.2/etc/hadoop/
vi hadoop-env.sh
#添加如下內容
export JAVA_HOME=/usr/local/java/jdk1.8.0_211
export HADOOP_HOME=/home/hadoop-3.3.2
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
export HADOOP_PID_DIR=${HADOOP_HOME}/pid
3.1.4 編輯etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hp5:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop_repo</value>
</property>
</configuration>
3.1.5 編輯etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hp5:50090</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hadoop-3.3.2/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hadoop-3.3.2/tmp/dfs/data</value>
</property>
</configuration>
3.1.6 編輯etc/hadoop/mapred-site.xml
<!--修改爲如下內容:-->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hp5:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hp5:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/home/hadoop-3.3.2</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/home/hadoop-3.3.2</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/home/hadoop-3.3.2</value>
</property>
</configuration>
3.1.7 編輯etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hp5</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://hp5:19888/jobhistory/logs/</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>8192</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>8</value>
</property>
#分配給每個Container(容器)的最小執行內存
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>128</value>
</property>
</configuration>
3.1.8 編輯etc/hadoop/workers
hp5
hp6
hp7
3.1.9 編輯啓動腳本
cd sbin
# 都加在腳本最前面
vi start-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vi stop-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vi start-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
vi stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
3.1.10 格式化HDFS
#將hadoop拷貝到其它節點
#格式化namenode節點
cd /home/hadoop-3.3.2/bin
bash hdfs namenode -format
#出現如下信息說明格式化成功
common.Storage: Storage directory /home/hadoop_repo/dfs/name has been successfully formatted.
3.1.11 啓動集羣
cd /home/hadoop-3.3.2/sbin
./start-all.sh #啓動集羣
./stop-all.sh #停止集羣
cd /home/hadoop-3.3.2/bin/
./mapred --daemon start historyserver#每個節點啓動
3.1.12 驗證
#主節點jps
NameNode
SecondaryNameNode
ResourceManager
#從節點jps
DataNode
NodeManager
3.1.13 查看集羣信息
#資源信息查看
http://10.31.1.119:8088/
#存儲節點信息查看
http://10.31.1.119:9870/
3.1.14 命令查看日誌
#資源信息查看
cd /home/hadoop-3.3.2/bin/
./yarn logs -applicationId application_1639553331593_0001