一、虛擬機設置
1. 搭建虛擬機
2. 確定hostname (master, slave1 salve2)
3. 設置網絡
4. 設置hosts
vi /etc/hosts
10.211.55.10 master
10.211.55.9 slave1
10.211.55.8 slave2
5. 關閉防火牆
service iptables stop
chkconfig iptables off
6. 關閉selinux
vi /etc/selinux/config
7. 配置ssh免密登錄(使用hadoop用戶wachoo)
ssh-keygen -t rsa
ssh-copy-id wachoo@master
ssh-copy-id wachoo@slave1
ssh-copy-id wachoo@slave2
二、JDK1.8安裝
1. 下載安裝包
scp software/jdk-8u121-linux-x64.rpm wachoo@master:/home/wachoo/software
2. 安裝
sudo rpm -ivh jdk-8u121-linux-x64.rpm
3. 設置環境變量
sudo vi ~/.bashrc
export JAVA_HOME=/usr/java/jdk1.8.0_121
export PATH=$PATH:$JAVA_HOME/bin
三、安裝Hadoop
1. 下載、解壓安裝包
http://archive.apache.org/dist/hadoop/core/hadoop-3.1.1/
sudo wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/core/hadoop-3.1.3/hadoop-3.1.3.tar.gz
tar -xvf hadoop-3.1.3.tar.gz -C /usr/hadoop/
2. 配置hadoop環境變量
vi /etc/profile
#在配置文件最後一行添加如下配置
# HADOOP_HOME
export HADOOP_HOME=/usr/hadoop/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
# 運行如下命令刷新環境變量
source /etc/profile
# 進行測試是否成功
hadoop version
3. 修改配置文件
#在/usr/hadoop目錄下創建目錄
mkdir /usr/hadoop/tmp /usr/hadoop/mr
mkdir -p /usr/hadoop/hdfs/namenode /usr/hadoop/hdfs/datanode
mkdir -p /usr/hadoop/yarn/nodemanager /usr/hadoop/yarn/logs
cd $HADOOP_HOME/etc/hadoop
# 修改文件如下:
# workers hadoop-env.sh
# core-site.xml hdfs-site.xml yarn-site.xml mapred-site.xml
#1. 刪除localhost,添加從節點主機名,例如:
vi workers
slave1
slave2
#2. 在 #JAVA_HOME=/usr/java/testing hdfs dfs -ls一行下面添加如下代碼
vi hadoop-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_121
export HADOOP_HOME=/usr/hadoop/hadoop-3.1.3
export HDFS_NAMENODE_USER=wachoo
export HDFS_DATANODE_USER=wachoo
export HDFS_SECONDARYNAMENODE_USER=wachoo
export YARN_RESOURCEMANAGER_USER=wachoo
export YARN_NODEMANAGER_USER=wachoo
#3.修改core-site.xml
vi core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
<description>namenode節點地址與端口</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/hadoop/tmp</value>
<description>臨時文件存儲目錄</description>
</property>
</configuration>
#4.修改hdfs-site.xml
vi hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/hadoop/hdfs/namenode</value>
<description>
Path on the local filesystem where theNameNode stores the namespace and transactions logs persistently.
</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/hadoop/hdfs/datanode</value>
<description>
Comma separated list of paths on the localfilesystem of a DataNode where it should store itsblocks.
</description>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9001</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
<description>need not permissions</description>
</property>
</configuration>
#5.修改yarn-site.xml
vi yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>file:/usr/hadoop/yarn/nodemanager</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>file:/usr/hadoop/yarn/logs</value>
</property>
</configuration>
#6.修改mapred-site.xml
vi mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx480M</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx480M</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>512</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/usr/hadoop/hadoop-3.1.3/etc/hadoop,
/usr/hadoop/hadoop-3.1.3/share/hadoop/common/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/common/lib/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/hdfs/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/hdfs/lib/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/mapreduce/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/mapreduce/lib/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/yarn/*,
/usr/hadoop/hadoop-3.1.3/share/hadoop/yarn/lib/*
</value>
<description>設置MapReduce資源調度類路徑,如果不設置可能會報錯</description>
</property>
</configuration>
4. 啓動(master)
#1. 格式化
hdfs namenode -format
# cd $HADOOP_HOME/sbin
#2. 啓動HDFS
start-dfs.sh
#3. 啓動YARN
start-yarn.sh
# 查看進程
jps
5. 驗證
hdfs fds -ls /
cd /hadoop-3.1.3/share/hadoop/mapreduce
hadoop jar hadoop-mapreduce-examples-3.1.3.jar pi 5 12
6. 重啓清除
# cd $HADOOP_HOME/sbin
#2. 啓動HDFS
stop-all.sh
rm -rf /usr/hadoop/tmp/*
hadoop namenode -format
start-all.sh