Ubuntu搭建Scikit-Learn、TensorFlow、Theano、Caffe、OpenCV、Hadoop、Spark开发环境

操作系统准备

安装操作系统

ubuntu-16.04-desktop-amd64.iso

修改root用户密码

sudo passwd root

更新操作系统

sudo apt-get update

sudo apt-get upgrade

安装开发环境依赖的软件包

sudo apt-get install openssh-server

sudo apt-get install build-essential

sudo apt-get install libopenblas-dev liblapack-dev libatlas-dev

sudo apt-get install cmake git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev ant

sudo apt-get install libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libjasper-dev libdc1394-22-dev

sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler

sudo apt-get install --no-install-recommends libboost-all-dev

sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev

设置本机ssh无密钥登陆

ssh-keygen -t rsa

cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys


安装Java

创建软件安装目录

mkdir -p ~/devtools/Java

cd ~/devtools/Java

部署Java

tar -xvf jdk-8u92-linux-x64.tar.gz

nano ~/.bashrc

-------------------------------------------------------------------------------------------------------

export JAVA_HOME=~/devtools/Java/jdk1.8.0_92

export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

export PATH=$JAVA_HOME/bin:$PATH

-------------------------------------------------------------------------------------------------------

重新加载环境变量,测试安装

source ~/.bashrc

java -version


安装SCALA

创建软件安装目录

mkdir -p ~/devtools/Scala

cd ~/devtools/Scala

部署Scala

tar -xvf scala-2.11.8.tgz

nano ~/.bashrc

-------------------------------------------------------------------------------------------------------

export SCALA_HOME=~/devtools/Scala/scala-2.11.8

export PATH=$SCALA_HOME/bin:$PATH

-------------------------------------------------------------------------------------------------------

重新加载环境变量,测试安装

source ~/.bashrc

scala -version


安装Python

安装Python基础开发包

sudo apt-get install python-pip python-dev python-numpy python-scipy python-matplotlib python-nose

安装Scikit-Learn

pip install -U scikit-learn

安装Theano

pip install --upgrade nose_parameterized

pip install --upgrade Theano

安装Tensorflow

export TF_BINARY_URL=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.9.0-cp27-none-linux_x86_64.whl

pip install --upgrade $TF_BINARY_URL


安装OpenCV

下载安装包,并解压缩到临时目录

unzip opencv-2.4.13.zip

cd opencv-2.4.13

新建build文件夹存放opencv的编译文件

mkdir build

cd build

配置opencv文件

cmake -D CMAKE_BUILD_TYPE=Release -D BUILD_PYTHON_SUPPORT=ON -D BUILD_NEW_PYTHON_SUPPORT=ON -D CMAKE_INSTALL_PREFIX=~/devtools/OpenCV/opencv2_4_13/ -DBUILD_SHARED_LIBS=OFF ..

编译、安装

make -j$(($(nproc) + 1))

make install

整合OpenCV、Python:把OpenCV安装目录下/lib/python2.7/site-packages/的文件,复制到Python的site-packages目录下

sudo cp ~/devtools/OpenCV/opencv2_4_13/lib/python2.7/dist-packages/cv2.so /usr/local/lib/python2.7/dist-packages/


安装Caffe

进入软件安装目录

cd ~/devtools

解压缩安装文件

unzip caffe-master.zip

mv caffe-master caffe

进入caffe目录,编辑Makefile.config文件

cd caffe

cp Makefile.config.example Makefile.config

nano Makefile.config

-------------------------------------------------------------------------------------------------------

CPU_ONLY := 1

BLAS := open

INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial

LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu/hdf5/serial

-------------------------------------------------------------------------------------------------------

安装Python依赖

sudo pip install -r python/requirements.txt

编译caffe

make all -j $(($(nproc) + 1))

make test -j $(($(nproc) + 1))

make runtest -j $(($(nproc) + 1))

构建PyCaffe(Caffe的Python接口)

make pycaffe -j $(($(nproc) + 1))

将Caffe添加到环境变量中

-------------------------------------------------------------------------------------------------------

export CAFFE_HOME=~/devtools/caffe

export PYTHONPATH=$CAFFE_HOME/python:$PYTHONPATH

-------------------------------------------------------------------------------------------------------

source ~/.bashrc


部署HADOOP伪分布式

创建软件安装目录

mkdir -p ~/devtools/Hadoop

cd ~/devtools/Hadoop

部署Hadoop

tar -xvf hadoop-2.7.2.tar.gz

nano ~/.bashrc

-------------------------------------------------------------------------------------------------------

export HADOOP_HOME=~/devtools/Hadoop/hadoop-2.7.2

export PATH=$HADOOP_HOME/bin:$PATH

-------------------------------------------------------------------------------------------------------

source ~/.bashrc

hadoop version

进入hadoop安装目录下etc/hadoop文件夹,编辑hadoop-env.sh文件,设置JAVA_HOME变量

cd ~/devtools/Hadoop/hadoop-2.7.2/etc/hadoop

nano hadoop-env.sh

-------------------------------------------------------------------------------------------------------

# The java implementation to use.

# export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=~/devtools/Java/jdk1.8.0_92

-------------------------------------------------------------------------------------------------------

编辑core-site.xml文件

nano core-site.xml

-----------------------------------------------------------------------------------------------------

<configuration>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>file:~/devtools/workspace/hadoop/tmp</value>
  </property>
  <property>
    <name>fs.default.name</name>
    <value>hdfs://localhost:9000</value>
  </property>
</configuration>

-----------------------------------------------------------------------------------------------------

编辑hdfs-site.xml文件

nano hdfs-site.xml

-------------------------------------------------------------------------------------------------------

<configuration>
  <property>
    <name>dfs.replication</name>
    <value>1</value>
  </property>
  <property>
    <name>dfs.name.dir</name>
    <value>file:~/devtools/workspace/hadoop/hdfs/name</value>
  </property>
  <property>
    <name>dfs.data.dir</name>
    <value>file:~/devtools/workspace/hadoop/hdfs/data</value>
  </property>
</configuration>

-------------------------------------------------------------------------------------------------------

复制mapred-site.xml.template文件,另存为mapred-site.xml。编辑mapred-site.xml文件

cp mapred-site.xml.template mapred-site.xml

nano mapred-site.xml

-------------------------------------------------------------------------------------------------------

<configuration>
  <property>
    <name>mapred.job.tracker</name>
    <value>localhost:9001</value>
  </property>
</configuration>

-------------------------------------------------------------------------------------------------------

配置完成后,执行 namenode的格式化

hdfs namenode -format

启动hadoop

~/devtools/Hadoop/hadoop-2.7.2/sbin/start-dfs.sh

检查haddop服务是否启动正常

jps

打开浏览器,观察HDFS情况

http://localhost:50070

运行hadoop伪分布式集群实例,测试部署是否成功

hdfs dfs -mkdir -p /user/hadoop/input

hdfs dfs -put ~/devtools/Hadoop/hadoop-2.7.2/etc/hadoop/*.xml /user/hadoop/input

hadoop jar ~/devtools/Hadoop/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar grep /user/hadoop/input /user/hadoop/output 'dfs[a-z.]+'

清理测试文件

hdfs dfs -rm -r /user/hadoop


部署Spark单机环境

创建软件安装目录

mkdir -p ~/devtools/Spark

cd ~/devtools/Spark

部署Spark

tar -xvf spark-1.6.2-bin-hadoop2.6.tgz

nano ~/.bashrc

-------------------------------------------------------------------------------------------------------

export SPARK_HOME=~/devtools/Spark/spark-1.6.2-bin-hadoop2.6

export PATH=$SPARK_HOME/bin:$PATH

-------------------------------------------------------------------------------------------------------

source ~/.bashrc

进入spark安装目录下conf文件夹,复制spark-env.sh.template文件,另存为spark-env.sh。编辑spark-env.sh文件

cd ~/devtools/Spark/spark-1.6.2-bin-hadoop2.6/conf

cp spark-env.sh.template spark-env.sh

nano spark-env.sh

-------------------------------------------------------------------------------------------------------

export JAVA_HOME=~/devtools/Java/jdk1.8.0_92

export SCALA_HOME=~/devtools/Scala/scala-2.11.8

export HADOOP_CONF_DIR=~/devtools/Hadoop/hadoop-2.7.2/etc/hadoop

export SPARK_MASTER_IP=localhost

export SPARK_MASTER_PORT=7077

export SPARK_WORKER_CORES=2

export SPARK_WORKER_INSTANCES=1

export SPARK_WORKER_MEMORY=2g

-------------------------------------------------------------------------------------------------------

复制slaves.template文件,另存为slaves文件

cp slaves.template slaves

启动spark

~/devtools/Spark/spark-1.6.2-bin-hadoop2.6/sbin/start-all.sh

运行jps,观察进程是否启动

jps

运行自带示例

run-example org.apache.spark.examples.SparkPi

打开浏览器,观察spark控制台

http://localhost:8080

运行spark-shell

spark-shell

查看jogs等信息

http://localhost:4040

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章