操作系統
安裝操作系統
ubuntu-14.04.3-desktop-amd64.iso
修改root用戶密碼
sudo passwd root
更新操作系統
apt-get update
apt-get upgrade
安裝SSH
apt-get install openssh-server
設置ssh無密鑰登陸
ssh-keygen -t rsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
安裝JAVA
解壓縮jdk文件到相應目錄
jdk-7u79-linux-x64.tar.gz
編輯環境變量
nano ~/.bashrc
------------------------------------------------------
export JAVA_HOME=/apps/java/jdk1.7.0_79
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
------------------------------------------------------
重新加載環境變量,測試安裝
source ~/.bashrc
java -version
安裝PYTHON組件
安裝pip
apt-get install python-pip
安裝numpy
apt-get install python-numpy
安裝scipy
apt-get install python-scipy
安裝matplotlib
apt-get install python-matplotlib
安裝BeautifulSoup
apt-get install python-bs4
安裝feedparser
apt-get install python-feedparser
安裝Scikit Learn
安裝sckit-learn
pip install -U scikit-learn
安裝Octave
安裝octave
apt-get install octave
安裝octave圖形界面
apt-get install qtoctave
安裝SCALA
解壓縮jdk文件到相應目錄
scala-2.10.4.tgz
編輯環境變量
nano ~/.bashrc
------------------------------------------------------
export SCALA_HOME=/apps/scala/scala-2.10.4
export PATH=$JAVA_HOME/bin:$SCALA_HOME/bin:$PATH
------------------------------------------------------
重新加載環境變量,測試安裝
source ~/.bashrc
scala -version
安裝MYSQL
安裝mysql
apt-get install mysql-server
編輯/etc/mysql/my.cnf文件
------------------------------------------------------
[mysqld]
......
# bind-address = 127.0.0.1
......
default-storage-engine = innodb
innodb_file_per_table
collation-server = utf8_general_ci
init-connect = 'SET NAMES utf8'
character-set-server = utf8
......
------------------------------------------------------
重啓mysql服務
/etc/init.d/mysql restart
初始化Mysql
mysql_secure_installation
安裝HADOOP僞分佈式
解壓縮hadoop到相應目錄
hadoop-2.7.1.tar.gz
編輯環境變量
nano ~/.bashrc
------------------------------------------------------
export HADOOP_HOME=/apps/hadoop/hadoop-2.7.1
export PATH=$JAVA_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$PATH
------------------------------------------------------
重新加載環境變量,測試安裝
source ~/.bashrc
hadoop version
進入hadoop安裝目錄下etc/hadoop文件夾,編輯hadoop-env.sh文件,設置JAVA_HOME變量,保存設置
------------------------------------------------------
# The java implementation to use.
# export JAVA_HOME=${JAVA_HOME}
------------------------------------------------------
編輯core-site.xml文件------------------------------------------------------
<configuration><property>
<name>hadoop.tmp.dir</name>
<value>file:/apps_datas/hadoop/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
------------------------------------------------------
編輯hdfs-site.xml文件
------------------------------------------------------
<configuration><property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>file:/apps_datas/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>file:/apps_datas/hadoop/hdfs/data</value>
</property>
</configuration>
------------------------------------------------------
複製mapred-site.xml.template文件,另存爲mapred-site.xml。編輯mapred-site.xml文件
------------------------------------------------------
<configuration><property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value>
</property>
</configuration>
------------------------------------------------------
配置完成後,執行 namenode 的格式化hdfs namenode -format
啓動hadoop
/apps/hadoop/hadoop-2.7.1/sbin/start-all.sh
檢查haddop服務是否啓動正常
jps
打開瀏覽器,觀察hadoop情況
http://localhost:50070
運行hadoop僞分佈式集羣實例,測試部署是否成功
hdfs dfs -mkdir -p /user/hadoop/input
hdfs dfs -put /apps/hadoop/hadoop-2.7.1/etc/hadoop/*.xml input
hadoop jar /apps/hadoop/hadoop-2.7.1/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar grep input output 'dfs[a-z.]+'
若以上指令運行成功,並看到hdfs下有相應文件,則證明hadoop僞分佈式集羣部署成功
清理測試文件
hdfs dfs -rm -r /user/hadoop/output
hdfs dfs -rm -r /user/hadoop/input
安裝HIVE
登錄mysql
mysql -u root -p
創建hive數據庫
create database hive;
創建hive用戶,並賦予其訪問hive數據庫權限
grant all privileges on hive.* to 'hive'@'%' identified by 'hive';
退出mysql
解壓縮hive到相應目錄
apache-hive-1.2.1-bin.tar.gz
把mysql驅動複製到hive安裝目錄下的lib文件夾中
cp mysql-connector-java-5.1.36-bin.jar /apps/hadoop/apache-hive-1.2.1-bin/lib/編輯環境變量
nano ~/.bashrc
------------------------------------------------------
export HIVE_HOME=/apps/hadoop/apache-hive-1.2.1-bin
export PATH=$JAVA_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$PATH
------------------------------------------------------
重新加載環境變量
source ~/.bashrc
進入hive安裝目錄下conf文件夾
cd /apps/hadoop/apache-hive-1.2.1-bin/conf
複製hive-env.sh.templaete,另存爲hive-env.sh
cp hive-env.sh.template hive-env.sh
編輯hive-env.sh文件
nano hive-env.sh
分別設置HADOOP_HOME和HIVE_CONF_DIR兩個值
------------------------------------------------------
......
HADOOP_HOME=/apps/hadoop/hadoop-2.7.1
......
export HIVE_CONF_DIR=$HADOOP_HOME/etc/hadoop
複製hive-default.xml.templaete,另存爲hive-site.xml
cp hive-default.xml.template hive-site.xml
編輯hive-site.xml文件,修改一下配置項
nanohive-site.xml
------------------------------------------------------
......
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hive</value>
</property>
<property>
<name>hive.exec.local.scratchdir</name>
<value>/apps_datas/hive/iotmp/user</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.downloaded.resources.dir</name>
<value>/apps_datas/hive/iotmp/resources</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
<property>
<name>hive.querylog.location</name>
<value>/apps_datas/hive/iotmp/user</value>
<description>Location of Hive run time structured log file</description>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>/apps_datas/hive/iotmp/user/operation_logs</value>
<description>Top level directory where operation logs are stored if logging functionality is enabled</descripti$
</property>
......
------------------------------------------------------
啓動hdfs(若未啓動)
/apps/hadoop/hadoop-2.7.1/sbin/start-dfs.sh
運行hive
hivecp spark-env.sh.template spark-env.sh
安裝Spark單機環境
解壓縮Spark到相應目錄
spark-1.4.1-bin-hadoop2.6.tgz
編輯環境變量
nano ~/.bashrc
------------------------------------------------------
export SPARK_HOME=/apps/spark/spark-1.4.1-bin-hadoop2.6
export PATH=$JAVA_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$HIVE_HOME/bin:$SPARK_HOME/bin:$PATH
------------------------------------------------------
重新加載環境變量
source ~/.bashrc
進入spark安裝目錄下conf文件夾,複製spark-env.sh.template文件,另存爲spark-env.sh文件
cp spark-env.sh.template spark-env.sh
編輯spark-env.sh文件
nano spark-env.sh
------------------------------------------------------
export JAVA_HOME=/apps/java/jdk1.7.0_79
export SCALA_HOME=/apps/scala/scala-2.10.4
export HADOOP_CONF_DIR=/apps/hadoop/hadoop-2.7.1/etc/hadoop
export SPARK_MASTER_IP=localhost
export SPARK_MASTER_PORT=7077
export SPARK_WORKER_CORES=2
export SPARK_WORKER_INSTANCES=1
export SPARK_WORKER_MEMORY=2g
------------------------------------------------------
複製slaves.template文件,另存爲slaves文件cp slaves.template slaves
啓動spark
/apps/spark/spark-1.4.1-bin-hadoop2.6/sbin/start-all.sh
運行jps,觀察進程是否啓動
jps
運行自帶示例
run-example
org.apache.spark.examples.SparkPi
打開瀏覽器,觀察spark控制檯
http://localhost:8080/
進入spark-shell
spark-shell
查看jobs等信息
http://localhost:4040
安裝Tachyon單機環境
解壓縮Tachyon到相應目錄
tachyon-0.7.0-hadoop2.4-bin.tar.gz
編輯環境變量
nano ~/.bashrc
------------------------------------------------------
export TACHYON_HOME=/apps/tachyon/tachyon-0.7.0
export PATH=$JAVA_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$SPARK_HOME/bin:$TACHYON_HOME/bin:$PATH
------------------------------------------------------
重新加載環境變量
source ~/.bashrc
進入Tachyon安裝目錄下conf文件夾,複製tachyon-env.sh.template文件,另存爲tachyon-env.sh文件
cp tachyon-env.sh.template tachyon-env.sh
編輯tachyon-env.sh文件
nano tachyon-env.sh
------------------------------------------------------
export JAVA_HOME=/apps/java/jdk1.7.0_79
export TACHYON_MASTER_ADDRESS=localhost
# export TACHYON_UNDERFS_ADDRESS=$TACHYON_HOME/underFSStorage
export TACHYON_UNDERFS_ADDRESS=hdfs://localhost:9000/tachyon
export TACHYON_WORKER_MEMORY_SIZE=2GB
export TACHYON_UNDERFS_HDFS_IMPL=org.apache.hadoop.hdfs.DistributedFileSystem
export TACHYON_WORKER_MAX_WORKER_THREADS=2048
export TACHYON_MASTER_MAX_WORKER_THREADS=2048
------------------------------------------------------
啓動hadoop(如果hadoop沒有啓動)
/apps/hadoop/hadoop-2.7.1/sbin/start-all.sh在HDFS裏面,創建tachyon文件夾
hdfs dfs -mkdir /tachyon
初始化tachyon
tachyon format
啓動tachyon
tachyon-start.sh local
打開瀏覽器,輸入地址,查看tachyon是否啓動成功
http://localhost:19999/
此時,tachyon與hadoop之間已經兼容,接下來整合tachyon與spark,首先把tachyon和hadoop停掉
tachyon-stop.sh
/apps/hadoop/hadoop-2.7.1/sbin/stop-all.sh
進入hadoop安裝目錄下的etc/hadoop目錄,編輯core-site.xml文件,加入一下內容
nano /apps/hadoop/hadoop-2.7.1/etc/hadoop/core-site.xml
------------------------------------------------------
<configuration>
<property>
<name>fs.tachyon.impl</name>
<value>tachyon.hadoop.TFS</value>
</property>
</configuration>
------------------------------------------------------
啓動hadoop
/apps/hadoop/hadoop-2.7.1/sbin/start-dfs.sh啓動tachyon
tachyon-start.sh local
啓動spark
/apps/spark/spark-1.4.1-bin-hadoop2.6/sbin/start-all.sh啓動spark-shell
將hdfs中的文件存儲到tachyon中
val
s = sc.textFile("hdfs://localhost:9000/user/hadoop/derby.log")
s.saveAsTextFile("tachyon://localhost:19998/test")
val rdd = sc.textFile("tachyon://localhost:19998/test")
rdd.count()
如果以上過程沒有出現異常,則tachyon部署成功