操作系統準備
安裝操作系統
ubuntu-16.04-desktop-amd64.iso
修改root用戶密碼
sudo passwd root
更新操作系統
sudo apt-get update
sudo apt-get upgrade
安裝開發環境依賴的軟件包
sudo apt-get install openssh-server
sudo apt-get install build-essential
sudo apt-get install libopenblas-dev liblapack-dev libatlas-dev
sudo apt-get install cmake git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev ant
sudo apt-get install libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libjasper-dev libdc1394-22-dev
sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
sudo apt-get install --no-install-recommends libboost-all-dev
sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
設置本機ssh無密鑰登陸
ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
安裝Java
創建軟件安裝目錄
mkdir -p ~/devtools/Java
cd ~/devtools/Java
部署Java
tar -xvf jdk-8u92-linux-x64.tar.gz
nano ~/.bashrc
-------------------------------------------------------------------------------------------------------
export JAVA_HOME=~/devtools/Java/jdk1.8.0_92
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
-------------------------------------------------------------------------------------------------------
重新加載環境變量,測試安裝
source ~/.bashrc
java -version
安裝SCALA
創建軟件安裝目錄
mkdir -p ~/devtools/Scala
cd ~/devtools/Scala
部署Scala
tar -xvf scala-2.11.8.tgz
nano ~/.bashrc
-------------------------------------------------------------------------------------------------------
export SCALA_HOME=~/devtools/Scala/scala-2.11.8
export PATH=$SCALA_HOME/bin:$PATH
-------------------------------------------------------------------------------------------------------
重新加載環境變量,測試安裝
source ~/.bashrc
scala -version
安裝Python
安裝Python基礎開發包
sudo apt-get install python-pip python-dev python-numpy python-scipy python-matplotlib python-nose
安裝Scikit-Learn
pip install -U scikit-learn
安裝Theano
pip install --upgrade nose_parameterized
pip install --upgrade Theano
安裝Tensorflow
export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
pip install --upgrade $TF_BINARY_URL
安裝OpenCV
下載安裝包,並解壓縮到臨時目錄
unzip opencv-2.4.13.zip
cd opencv-2.4.13
新建build文件夾存放opencv的編譯文件
mkdir build
cd build
配置opencv文件
cmake -D CMAKE_BUILD_TYPE=Release -D BUILD_PYTHON_SUPPORT=ON -D BUILD_NEW_PYTHON_SUPPORT=ON -D CMAKE_INSTALL_PREFIX=~/devtools/OpenCV/opencv2_4_13/ -DBUILD_SHARED_LIBS=OFF ..
編譯、安裝
make -j$(($(nproc) + 1))
make install
整合OpenCV、Python:把OpenCV安裝目錄下/lib/python2.7/site-packages/的文件,複製到Python的site-packages目錄下
sudo cp ~/devtools/OpenCV/opencv2_4_13/lib/python2.7/dist-packages/cv2.so /usr/local/lib/python2.7/dist-packages/
安裝Caffe
進入軟件安裝目錄
cd ~/devtools
解壓縮安裝文件
unzip caffe-master.zip
mv caffe-master caffe
進入caffe目錄,編輯Makefile.config文件
cd caffe
cp Makefile.config.example Makefile.config
nano Makefile.config
-------------------------------------------------------------------------------------------------------
CPU_ONLY := 1
BLAS := open
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu/hdf5/serial
-------------------------------------------------------------------------------------------------------
安裝Python依賴
sudo pip install -r python/requirements.txt
編譯caffe
make all -j $(($(nproc) + 1))
make test -j $(($(nproc) + 1))
make runtest -j $(($(nproc) + 1))
構建PyCaffe(Caffe的Python接口)
make pycaffe -j $(($(nproc) + 1))
將Caffe添加到環境變量中
-------------------------------------------------------------------------------------------------------
export CAFFE_HOME=~/devtools/caffe
export PYTHONPATH=$CAFFE_HOME/python:$PYTHONPATH
-------------------------------------------------------------------------------------------------------
source ~/.bashrc
部署HADOOP僞分佈式
創建軟件安裝目錄
mkdir -p ~/devtools/Hadoop
cd ~/devtools/Hadoop
部署Hadoop
tar -xvf hadoop-2.7.2.tar.gz
nano ~/.bashrc
-------------------------------------------------------------------------------------------------------
export HADOOP_HOME=~/devtools/Hadoop/hadoop-2.7.2
export PATH=$HADOOP_HOME/bin:$PATH
-------------------------------------------------------------------------------------------------------
source ~/.bashrc
hadoop version
進入hadoop安裝目錄下etc/hadoop文件夾,編輯hadoop-env.sh文件,設置JAVA_HOME變量
cd ~/devtools/Hadoop/hadoop-2.7.2/etc/hadoop
nano hadoop-env.sh
-------------------------------------------------------------------------------------------------------
# The java implementation to use.
# export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=~/devtools/Java/jdk1.8.0_92
-------------------------------------------------------------------------------------------------------
編輯core-site.xml文件
nano core-site.xml
-----------------------------------------------------------------------------------------------------
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:~/devtools/workspace/hadoop/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
-----------------------------------------------------------------------------------------------------
編輯hdfs-site.xml文件
nano hdfs-site.xml
-------------------------------------------------------------------------------------------------------
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>file:~/devtools/workspace/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>file:~/devtools/workspace/hadoop/hdfs/data</value>
</property>
</configuration>
-------------------------------------------------------------------------------------------------------
複製mapred-site.xml.template文件,另存爲mapred-site.xml。編輯mapred-site.xml文件
cp mapred-site.xml.template mapred-site.xml
nano mapred-site.xml
-------------------------------------------------------------------------------------------------------
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value>
</property>
</configuration>
-------------------------------------------------------------------------------------------------------
配置完成後,執行 namenode的格式化
hdfs namenode -format
啓動hadoop
~/devtools/Hadoop/hadoop-2.7.2/sbin/start-dfs.sh
檢查haddop服務是否啓動正常
jps
打開瀏覽器,觀察HDFS情況
http://localhost:50070
運行hadoop僞分佈式集羣實例,測試部署是否成功
hdfs dfs -mkdir -p /user/hadoop/input
hdfs dfs -put ~/devtools/Hadoop/hadoop-2.7.2/etc/hadoop/*.xml /user/hadoop/input
hadoop jar ~/devtools/Hadoop/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar grep /user/hadoop/input /user/hadoop/output 'dfs[a-z.]+'
清理測試文件
hdfs dfs -rm -r /user/hadoop
部署Spark單機環境
創建軟件安裝目錄
mkdir -p ~/devtools/Spark
cd ~/devtools/Spark
部署Spark
tar -xvf spark-1.6.2-bin-hadoop2.6.tgz
nano ~/.bashrc
-------------------------------------------------------------------------------------------------------
export SPARK_HOME=~/devtools/Spark/spark-1.6.2-bin-hadoop2.6
export PATH=$SPARK_HOME/bin:$PATH
-------------------------------------------------------------------------------------------------------
source ~/.bashrc
進入spark安裝目錄下conf文件夾,複製spark-env.sh.template文件,另存爲spark-env.sh。編輯spark-env.sh文件
cd ~/devtools/Spark/spark-1.6.2-bin-hadoop2.6/conf
cp spark-env.sh.template spark-env.sh
nano spark-env.sh
-------------------------------------------------------------------------------------------------------
export JAVA_HOME=~/devtools/Java/jdk1.8.0_92
export SCALA_HOME=~/devtools/Scala/scala-2.11.8
export HADOOP_CONF_DIR=~/devtools/Hadoop/hadoop-2.7.2/etc/hadoop
export SPARK_MASTER_IP=localhost
export SPARK_MASTER_PORT=7077
export SPARK_WORKER_CORES=2
export SPARK_WORKER_INSTANCES=1
export SPARK_WORKER_MEMORY=2g
-------------------------------------------------------------------------------------------------------
複製slaves.template文件,另存爲slaves文件
cp slaves.template slaves
啓動spark
~/devtools/Spark/spark-1.6.2-bin-hadoop2.6/sbin/start-all.sh
運行jps,觀察進程是否啓動
jps
運行自帶示例
run-example org.apache.spark.examples.SparkPi
打開瀏覽器,觀察spark控制檯
http://localhost:8080
運行spark-shell
spark-shell
查看jogs等信息
http://localhost:4040