操作系统准备
安装操作系统
ubuntu-16.04-desktop-amd64.iso
修改root用户密码
sudo passwd root
更新操作系统
sudo apt-get update
sudo apt-get upgrade
安装开发环境依赖的软件包
sudo apt-get install openssh-server
sudo apt-get install build-essential
sudo apt-get install libopenblas-dev liblapack-dev libatlas-dev
sudo apt-get install cmake git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev ant
sudo apt-get install libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libjasper-dev libdc1394-22-dev
sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
sudo apt-get install --no-install-recommends libboost-all-dev
sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
设置本机ssh无密钥登陆
ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
安装Java
创建软件安装目录
mkdir -p ~/devtools/Java
cd ~/devtools/Java
部署Java
tar -xvf jdk-8u92-linux-x64.tar.gz
nano ~/.bashrc
-------------------------------------------------------------------------------------------------------
export JAVA_HOME=~/devtools/Java/jdk1.8.0_92
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH
-------------------------------------------------------------------------------------------------------
重新加载环境变量,测试安装
source ~/.bashrc
java -version
安装SCALA
创建软件安装目录
mkdir -p ~/devtools/Scala
cd ~/devtools/Scala
部署Scala
tar -xvf scala-2.11.8.tgz
nano ~/.bashrc
-------------------------------------------------------------------------------------------------------
export SCALA_HOME=~/devtools/Scala/scala-2.11.8
export PATH=$SCALA_HOME/bin:$PATH
-------------------------------------------------------------------------------------------------------
重新加载环境变量,测试安装
source ~/.bashrc
scala -version
安装Python
安装Python基础开发包
sudo apt-get install python-pip python-dev python-numpy python-scipy python-matplotlib python-nose
安装Scikit-Learn
pip install -U scikit-learn
安装Theano
pip install --upgrade nose_parameterized
pip install --upgrade Theano
安装Tensorflow
export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl
pip install --upgrade $TF_BINARY_URL
安装OpenCV
下载安装包,并解压缩到临时目录
unzip opencv-2.4.13.zip
cd opencv-2.4.13
新建build文件夹存放opencv的编译文件
mkdir build
cd build
配置opencv文件
cmake -D CMAKE_BUILD_TYPE=Release -D BUILD_PYTHON_SUPPORT=ON -D BUILD_NEW_PYTHON_SUPPORT=ON -D CMAKE_INSTALL_PREFIX=~/devtools/OpenCV/opencv2_4_13/ -DBUILD_SHARED_LIBS=OFF ..
编译、安装
make -j$(($(nproc) + 1))
make install
整合OpenCV、Python:把OpenCV安装目录下/lib/python2.7/site-packages/的文件,复制到Python的site-packages目录下
sudo cp ~/devtools/OpenCV/opencv2_4_13/lib/python2.7/dist-packages/cv2.so /usr/local/lib/python2.7/dist-packages/
安装Caffe
进入软件安装目录
cd ~/devtools
解压缩安装文件
unzip caffe-master.zip
mv caffe-master caffe
进入caffe目录,编辑Makefile.config文件
cd caffe
cp Makefile.config.example Makefile.config
nano Makefile.config
-------------------------------------------------------------------------------------------------------
CPU_ONLY := 1
BLAS := open
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu/hdf5/serial
-------------------------------------------------------------------------------------------------------
安装Python依赖
sudo pip install -r python/requirements.txt
编译caffe
make all -j $(($(nproc) + 1))
make test -j $(($(nproc) + 1))
make runtest -j $(($(nproc) + 1))
构建PyCaffe(Caffe的Python接口)
make pycaffe -j $(($(nproc) + 1))
将Caffe添加到环境变量中
-------------------------------------------------------------------------------------------------------
export CAFFE_HOME=~/devtools/caffe
export PYTHONPATH=$CAFFE_HOME/python:$PYTHONPATH
-------------------------------------------------------------------------------------------------------
source ~/.bashrc
部署HADOOP伪分布式
创建软件安装目录
mkdir -p ~/devtools/Hadoop
cd ~/devtools/Hadoop
部署Hadoop
tar -xvf hadoop-2.7.2.tar.gz
nano ~/.bashrc
-------------------------------------------------------------------------------------------------------
export HADOOP_HOME=~/devtools/Hadoop/hadoop-2.7.2
export PATH=$HADOOP_HOME/bin:$PATH
-------------------------------------------------------------------------------------------------------
source ~/.bashrc
hadoop version
进入hadoop安装目录下etc/hadoop文件夹,编辑hadoop-env.sh文件,设置JAVA_HOME变量
cd ~/devtools/Hadoop/hadoop-2.7.2/etc/hadoop
nano hadoop-env.sh
-------------------------------------------------------------------------------------------------------
# The java implementation to use.
# export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=~/devtools/Java/jdk1.8.0_92
-------------------------------------------------------------------------------------------------------
编辑core-site.xml文件
nano core-site.xml
-----------------------------------------------------------------------------------------------------
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:~/devtools/workspace/hadoop/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
-----------------------------------------------------------------------------------------------------
编辑hdfs-site.xml文件
nano hdfs-site.xml
-------------------------------------------------------------------------------------------------------
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>file:~/devtools/workspace/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>file:~/devtools/workspace/hadoop/hdfs/data</value>
</property>
</configuration>
-------------------------------------------------------------------------------------------------------
复制mapred-site.xml.template文件,另存为mapred-site.xml。编辑mapred-site.xml文件
cp mapred-site.xml.template mapred-site.xml
nano mapred-site.xml
-------------------------------------------------------------------------------------------------------
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value>
</property>
</configuration>
-------------------------------------------------------------------------------------------------------
配置完成后,执行 namenode的格式化
hdfs namenode -format
启动hadoop
~/devtools/Hadoop/hadoop-2.7.2/sbin/start-dfs.sh
检查haddop服务是否启动正常
jps
打开浏览器,观察HDFS情况
http://localhost:50070
运行hadoop伪分布式集群实例,测试部署是否成功
hdfs dfs -mkdir -p /user/hadoop/input
hdfs dfs -put ~/devtools/Hadoop/hadoop-2.7.2/etc/hadoop/*.xml /user/hadoop/input
hadoop jar ~/devtools/Hadoop/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar grep /user/hadoop/input /user/hadoop/output 'dfs[a-z.]+'
清理测试文件
hdfs dfs -rm -r /user/hadoop
部署Spark单机环境
创建软件安装目录
mkdir -p ~/devtools/Spark
cd ~/devtools/Spark
部署Spark
tar -xvf spark-1.6.2-bin-hadoop2.6.tgz
nano ~/.bashrc
-------------------------------------------------------------------------------------------------------
export SPARK_HOME=~/devtools/Spark/spark-1.6.2-bin-hadoop2.6
export PATH=$SPARK_HOME/bin:$PATH
-------------------------------------------------------------------------------------------------------
source ~/.bashrc
进入spark安装目录下conf文件夹,复制spark-env.sh.template文件,另存为spark-env.sh。编辑spark-env.sh文件
cd ~/devtools/Spark/spark-1.6.2-bin-hadoop2.6/conf
cp spark-env.sh.template spark-env.sh
nano spark-env.sh
-------------------------------------------------------------------------------------------------------
export JAVA_HOME=~/devtools/Java/jdk1.8.0_92
export SCALA_HOME=~/devtools/Scala/scala-2.11.8
export HADOOP_CONF_DIR=~/devtools/Hadoop/hadoop-2.7.2/etc/hadoop
export SPARK_MASTER_IP=localhost
export SPARK_MASTER_PORT=7077
export SPARK_WORKER_CORES=2
export SPARK_WORKER_INSTANCES=1
export SPARK_WORKER_MEMORY=2g
-------------------------------------------------------------------------------------------------------
复制slaves.template文件,另存为slaves文件
cp slaves.template slaves
启动spark
~/devtools/Spark/spark-1.6.2-bin-hadoop2.6/sbin/start-all.sh
运行jps,观察进程是否启动
jps
运行自带示例
run-example org.apache.spark.examples.SparkPi
打开浏览器,观察spark控制台
http://localhost:8080
运行spark-shell
spark-shell
查看jogs等信息
http://localhost:4040