Hadoop的CDH簡介


1.機器環境

    1.1配置:

       #centos 系統 內存:31G  CPU:24 存儲: 2T

       #三臺機器:

       A:10.90.3.81(H-namenode)
       B:10.90.3.82(H-datanode)
       C:10.90.3.83(H-datanode)

       #創建賬號:
        useradd hadoop -g hadoop

       #目錄結構:
        /data/cloud/(hadoop:hadoop)
hadoop -> /data/cloud/hadoop-2.6.0-cdh5.4.8
hadoop-2.6.0-cdh5.4.8
hive -> /data/cloud/hive-1.1.0-cdh5.4.8
hive-1.1.0-cdh5.4.8
spark -> /data/cloud/spark-1.3.0-cdh5.4.8-bin
spark-1.3.0-cdh5.4.8-bin
zookeeper -> /data/cloud/zookeeper-3.4.5-cdh5.4.8
zookeeper-3.4.5-cdh5.4.8

       #SSH免登錄:
        A -> B & C

        

  1.2軟件:  

      #yum install -y snappy 
                      snappy-devel 
                      autoconf 
                      automake 
                      libtool 
                      git  
                      gcc 
                      gcc-c++ 
                      make 
                      cmake 
                      openssl-devel
                      ncurses-devel 
                      bzip2-devel 


       rpm -aq|grep [snappy|autoconf|make|libtool|git|gcc|openssl|ncurses|bzip2]


   #jdk7(/usr/local)
http://download.oracle.com/otn-pub/java/jdk/7u79-b15/jdk-7u79-linux-x64.tar.gz 
      java -version


    #maven:(/usr/local/maven)
      http://ftp.cuhk.edu.hk/pub/packages/apache.org/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
      mvn -version


    #ant(/usr/local/ant)
http://apache.01link.hk//ant/binaries/apache-ant-1.9.6-bin.tar.gz
ant -version


    #protobuf(/usr/local/protobuf)
        https://code.google.com/p/protobuf/downloads/list
  protoc  --version
        參考:http://blog.csdn.net/huguoping830623/article/details/45482725


    #snappy(/usr/local/snappy)
https://github.com/google/snappy/releases/download/1.1.3/snappy-1.1.3.tar.gz

參考:http://google.github.io/snappy/

2.Hadoop安裝

  2.1:hadoop編譯


    #hadoop(/data/src/hadoop-2.6.0-cdh5.4.8)

      http://archive.cloudera.com/cdh5/cdh/5/hadoop-2.6.0-cdh5.4.8-src.tar.gz
      mvn clean package -DskipTests -Pdist,native -Dtar -Dsnappy.lib=/usr/local/snappy/lib/  -Dbundle.snappy
/data/src/hadoop-2.6.0-cdh5.4.8/src/hadoop-dist/target/hadoop-2.6.0-cdh5.4.8.tar.gz
cp  /data/src/hadoop-2.6.0-cdh5.4.8/hadoop-2.6.0-cdh5.4.8.tar.gz   /data/cloud/
cd /data/cloud/
tar xvfz hadoop-2.6.0-cdh5.4.8.tar.gz 
ln -s /data/cloud/hadoop-2.6.0-cdh5.4.8  /data/cloud/hadoop
chown -R hadoop:hadoop  /data/cloud/
    

     #hadoop-snappy(/data/src/hadoop-snappy)

https://github.com/electrum/hadoop-snappy
mvn package -Dsnappy.prefix=/usr/local/snappy
tar xvfz hadoop-snappy-0.0.1-SNAPSHOT.tar.gz 
cp -r   ./hadoop-snappy-0.0.1-SNAPSHOT/lib/*   $HADOOP_HOME/lib
ll hadoop-snappy-master/target/hadoop-snappy-0.0.1-SNAPSHOT-tar/hadoop-snappy-0.0.1-SNAPSHOT/lib/
        hadoop-snappy-0.0.1-SNAPSHOT.jar

     #/data/cloud/hadoop/bin/hadoop checknative -a(驗證so文件)
  ll /data/cloud/hadoop/lib/native/
ll /data/cloud/hadoop/lib/native/Linux-amd64-64/


  2.2環境配置(/etc/profile.d/ice.sh)

#java
export JAVA_HOME=/usr/local/java
export CLASSPATH=$JAVA_HOME/lib:$JAVA_HOME/jre/lib:.

#protobuf
export PROTOBUF_HOME=/usr/local/protobuf

#hadoop
export HADOOP_HOME=/data/cloud/hadoop
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"

#yarn
export YARN_HOME=$HADOOP_HOME
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop

#spark
export SPARK_HOME=/data/cloud/spark
export SPARK_JAR=$SPARK_HOME/spark-assembly-1.3.0-cdh5.4.8-hadoop2.6.0-cdh5.4.8.jar

#scala
export SCALA_HOME=/usr/local/scala

export PATH=$JAVA_HOME/bin:$PROTOBUF_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH

2.3 系統配置:

#core-site.xml
<configuration>
       <property>
               <name>fs.default.name</name>
               <value>hdfs://carcloud81:9000</value>
       </property>
       <property>
               <name>io.file.buffer.size</name>
               <value>131072</value>
       </property>
       <property>
               <name>hadoop.tmp.dir</name>
               <value>file:/data/cloud/hadoop/tmp</value>
       </property>
</configuration>


#hdfs-site.xml
<configuration>
        <property>
               <name>dfs.namenode.name.dir</name>
               <value>file:/data/cloud/hadoop/dfs/nnode</value>
        </property>
        <property>
               <name>dfs.namenode.data.dir</name>
               <value>file:/data/cloud/hadoop/dfs/dnode</value>
        </property>
        <property>
               <name>dfs.replication</name>   
               <value>2</value> 
        </property>
</configuration>
#mapred-site.xml
<configuration>
<property>
   <name>mapreduce.framework.name</name>
   <value>yarn</value>
</property>
<property>  
  <name>mapreduce.jobtracker.address</name>  
  <value>carcloud81:8021</value>  
</property>  
<property>
   <name>mapreduce.jobhistory.address</name>
   <value>carcloud81:10020</value>
</property>
<property>
  <name>mapreduce.jobhistory.webapp.address</name>
  <value>carcloud81:19888</value>
</property>
<property>  
  <name>mapred.max.maps.per.node</name>  
  <value>4</value>  
</property>  
<property>  
  <name>mapred.max.reduces.per.node</name>  
  <value>2</value>  
</property>  
<property>  
  <name>mapreduce.map.memory.mb</name>  
  <value>1408</value>  
</property>  
<property>  
  <name>mapreduce.map.java.opts</name>  
  <value>-Xmx1126M</value>  
</property>  
<property>  
  <name>mapreduce.reduce.memory.mb</name>  
  <value>2816</value>  
</property>  
<property>  
  <name>mapreduce.reduce.java.opts</name>  
  <value>-Xmx2252M</value>  
</property>  
<property>  
  <name>mapreduce.task.io.sort.mb</name>  
  <value>512</value>  
</property>  
<property>  
  <name>mapreduce.task.io.sort.factor</name>  
  <value>100</value>  
</property>  
</configuration>
#master
carcloud81
#slaves
carcloud82
carcloud83
#yarn-site.xml
<configuration>


<!-- Site specific YARN configuration properties -->
        <property>
               <name>yarn.resourcemanager.address</name>
               <value>carcloud81:8032</value>
        </property>


        <property>
               <name>yarn.resourcemanager.scheduler.address</name>
               <value>carcloud81:8030</value>
        </property>


        <property>
               <name>yarn.resourcemanager.resource-tracker.address</name>
               <value>carcloud81:8031</value>
        </property>


        <property>
               <name>yarn.resourcemanager.admin.address</name>
               <value>carcloud81:8033</value>
        </property>


        <property>
               <name>yarn.resourcemanager.webapp.address</name>
               <value>carcloud81:8088</value>
        </property>


        <property>
               <name>yarn.nodemanager.aux-services</name>
               <value>mapreduce_shuffle</value>
        </property>


        <property>
               <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
               <value>org.apache.hadoop.mapred.ShuffleHandler</value>
        </property>
        <property>    
               <description>Classpath for typical applications.</description>    
               <name>yarn.application.classpath</name>    
               <value>$HADOOP_CONF_DIR  
               ,$HADOOP_COMMON_HOME/share/hadoop/common/*  
               ,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*  
               ,$HADOOP_HDFS_HOME/share/hadoop/hdfs/*  
               ,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*  
               ,$YARN_HOME/share/hadoop/yarn/*</value>    
         </property>  
   
         <!-- Configurations for NodeManager -->  
         <property>  
              <name>yarn.nodemanager.resource.memory-mb</name>  
              <value>5632</value>  
         </property>  
         <property>
              <name>yarn.scheduler.minimum-allocation-mb</name>
              <value>1408</value>
         </property>
         <property>
              <name>yarn.scheduler.maximum-allocation-mb</name>
              <value>5632</value>
         </property>
</configuration>

2.4 啓動與關閉



    #將82,83的軟件環境和hadoop包,進行同步安裝即可。
    #在81上初始化服務:
>hdfs  namenode  -format
       
    #在81上啓動服務:
        >start-dfs.sh
        >start-yarn.sh


    #在81上JPS,進程list:
         SecondaryNameNode
NameNode
ResourceManager
         Master
Worker
    #在82,83上JPS,進程list:
         DataNode
         NodeManager

  
    #在81上關閉服務:
       >stop-yarn.sh
       >stop-dfs.sh
       



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章