Hadoop-集羣搭建

 

一、虛擬機設置

1.  搭建虛擬機

2. 確定hostname (master, slave1 salve2)

3. 設置網絡

4. 設置hosts

vi /etc/hosts

10.211.55.10 master
10.211.55.9 slave1
10.211.55.8 slave2

5. 關閉防火牆

service iptables stop

chkconfig iptables off

6. 關閉selinux

vi /etc/selinux/config

7. 配置ssh免密登錄(使用hadoop用戶wachoo)

ssh-keygen -t rsa

ssh-copy-id wachoo@master
ssh-copy-id wachoo@slave1
ssh-copy-id wachoo@slave2

 

二、JDK1.8安裝

1. 下載安裝包

scp software/jdk-8u121-linux-x64.rpm wachoo@master:/home/wachoo/software

2. 安裝

sudo rpm -ivh jdk-8u121-linux-x64.rpm

3. 設置環境變量

sudo vi ~/.bashrc

export JAVA_HOME=/usr/java/jdk1.8.0_121
export PATH=$PATH:$JAVA_HOME/bin

 

三、安裝Hadoop

1. 下載、解壓安裝包

http://archive.apache.org/dist/hadoop/core/hadoop-3.1.1/

sudo wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/core/hadoop-3.1.3/hadoop-3.1.3.tar.gz

tar -xvf hadoop-3.1.3.tar.gz -C /usr/hadoop/

2. 配置hadoop環境變量

vi /etc/profile
#在配置文件最後一行添加如下配置
 
# HADOOP_HOME
export HADOOP_HOME=/usr/hadoop/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

# 運行如下命令刷新環境變量
source /etc/profile

# 進行測試是否成功
hadoop version

3. 修改配置文件

#在/usr/hadoop目錄下創建目錄

mkdir /usr/hadoop/tmp  /usr/hadoop/mr
mkdir -p /usr/hadoop/hdfs/namenode  /usr/hadoop/hdfs/datanode
mkdir -p /usr/hadoop/yarn/nodemanager  /usr/hadoop/yarn/logs
cd $HADOOP_HOME/etc/hadoop

# 修改文件如下:
# workers hadoop-env.sh
# core-site.xml hdfs-site.xml yarn-site.xml mapred-site.xml

#1. 刪除localhost,添加從節點主機名,例如:
vi workers

slave1
slave2

#2. 在 #JAVA_HOME=/usr/java/testing hdfs dfs -ls一行下面添加如下代碼
vi hadoop-env.sh

export JAVA_HOME=/usr/java/jdk1.8.0_121
export HADOOP_HOME=/usr/hadoop/hadoop-3.1.3
export HDFS_NAMENODE_USER=wachoo
export HDFS_DATANODE_USER=wachoo
export HDFS_SECONDARYNAMENODE_USER=wachoo
export YARN_RESOURCEMANAGER_USER=wachoo
export YARN_NODEMANAGER_USER=wachoo

#3.修改core-site.xml
vi core-site.xml

<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://master:9000</value>
        <description>namenode節點地址與端口</description>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/usr/hadoop/tmp</value>
        <description>臨時文件存儲目錄</description>
    </property>
</configuration>

#4.修改hdfs-site.xml
vi hdfs-site.xml

<configuration>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:/usr/hadoop/hdfs/namenode</value>
        <description>
            Path on the local filesystem where theNameNode stores the namespace and transactions logs persistently.
        </description>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:/usr/hadoop/hdfs/datanode</value>
        <description>
            Comma separated list of paths on the localfilesystem of a DataNode where it should store itsblocks.
        </description>
    </property>
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>master:9001</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
    <property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
        <description>need not permissions</description>
    </property>
</configuration>

#5.修改yarn-site.xml
vi yarn-site.xml

<configuration>
    <!-- Site specific YARN configuration properties -->
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>master</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.scheduler.minimum-allocation-mb</name>
        <value>512</value>
    </property>
    <property>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>512</value>
    </property>
    <property>
        <name>yarn.nodemanager.local-dirs</name>
        <value>file:/usr/hadoop/yarn/nodemanager</value>
    </property>
    <property>
        <name>yarn.nodemanager.log-dirs</name>
        <value>file:/usr/hadoop/yarn/logs</value>
    </property>
</configuration>


#6.修改mapred-site.xml
vi mapred-site.xml

<configuration>
    <property>
          <name>mapreduce.framework.name</name>
          <value>yarn</value>
    </property>
    <property>
         <name>mapreduce.map.memory.mb</name>
         <value>512</value>
   </property>
   <property>
         <name>mapreduce.map.java.opts</name>
         <value>-Xmx480M</value>
   </property>
   <property>
         <name>mapreduce.reduce.memory.mb</name>
         <value>512</value>
   </property>
   <property>
         <name>mapreduce.reduce.java.opts</name>
         <value>-Xmx480M</value>
   </property>
   <property>
         <name>yarn.app.mapreduce.am.env</name>
         <value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
   </property>
   <property>
         <name>mapreduce.map.env</name>
         <value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
   </property>
   <property>
         <name>mapreduce.reduce.env</name>
         <value>HADOOP_MAPRED_HOME=/usr/hadoop/hadoop-3.1.3</value>
   </property>
   <property>
         <name>mapreduce.map.memory.mb</name>
         <value>512</value>
   </property>
   <property>
         <name>mapreduce.reduce.memory.mb</name>
         <value>512</value>
   </property>
   <property>
         <name>mapreduce.application.classpath</name>
          <value>
             /usr/hadoop/hadoop-3.1.3/etc/hadoop,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/common/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/common/lib/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/hdfs/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/hdfs/lib/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/mapreduce/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/mapreduce/lib/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/yarn/*,
             /usr/hadoop/hadoop-3.1.3/share/hadoop/yarn/lib/*
          </value>
          <description>設置MapReduce資源調度類路徑,如果不設置可能會報錯</description>
    </property>
</configuration>

4. 啓動(master)

#1. 格式化

hdfs namenode -format

# cd  $HADOOP_HOME/sbin
#2. 啓動HDFS

start-dfs.sh

#3. 啓動YARN

start-yarn.sh

# 查看進程
jps

5. 驗證

hdfs fds -ls  /


cd /hadoop-3.1.3/share/hadoop/mapreduce

hadoop jar hadoop-mapreduce-examples-3.1.3.jar pi 5 12

6. 重啓清除

# cd  $HADOOP_HOME/sbin
#2. 啓動HDFS
stop-all.sh

rm -rf /usr/hadoop/tmp/*

hadoop namenode -format

start-all.sh

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章