基於conda環境的Scrapy Python爬蟲簡易管理腳本

#!/bin/bash
CONDA_PATH="/root/anaconda2" # conda dir
CONDA_VENV_NAME="douyin" # 虛擬環境名
PROJECT_NAME="douyin" # 項目名,可隨意
PROJECT_PATH="/data/douyin/douyinChallenge/douyinChallenge" # scrapy項目路徑
SPIDER_NAME="douyin_challenge" # 爬蟲名
PID_FILE="${PROJECT_PATH}/logs/${PROJECT_NAME}.pid" # pid文件路徑
LOG_FILE="${PROJECT_PATH}/logs/${PROJECT_NAME}`date +%Y-%m-%d`.log" # 日誌路徑
# 由於使用conda虛擬環境,需要增加以下代碼以順利進入虛擬環境
__conda_setup="$('${CONDA_PATH}/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
    eval "$__conda_setup"
else
    if [ -f "${CONDA_PATH}/etc/profile.d/conda.sh" ]; then
        . "${CONDA_PATH}/etc/profile.d/conda.sh"
    else
        export PATH="${CONDA_PATH}/bin:$PATH"
    fi
fi
unset __conda_setup
source ~/.bashrc

start()
{
    # 開啓程序,可以傳入開啓數量,默認爲4
    if [ -f ${PID_FILE} ];then
            SPID=`cat ${PID_FILE}`
            if [ "$SPID" != "" ];then
               echo "maybe process is runing,please stop it first"
               exit
            fi
     fi
    echo "Starting"
    conda activate ${CONDA_VENV_NAME}
    cd ${PROJECT_PATH}
    PROCESS_NUM=$2
    for((i=0;i<${PROCESS_NUM:-4};i++));
    do
        nohup scrapy crawl ${SPIDER_NAME} >>$LOG_FILE 2>&1 & pid=$!
        echo "$pid" >> $PID_FILE
        echo "Started"
    done

}

stop()
{
    # 根據Pid文件關閉程序
    if [ -f ${PID_FILE} ];then
            PIDS=`cat ${PID_FILE}`
            if [ "$PIDS" != "" ];then
               kill -2  $PIDS
               echo  > $PID_FILE
               echo "stop success"
            fi
     fi
}

checkProcessStatus()  
{  
    CURRENT_PID=$1  
    if [ "$CURRENT_PID" != "" ] ;then  
        CURRENT_PIDLIST=`ps -ef|grep $CURRENT_PID|grep -v grep|awk -F" " '{print $2}'`
    else
        return 0
    fi
    for PID in `echo $CURRENT_PIDLIST`  
    do  
        if [ "$CURRENT_PID" = "" ] ;then  
            PID1="$CURRENT_PID"  
        else  
            PID1="$PID"  
        fi  
  
        if [ "$PID1" = "$CURRENT_PID" ] ;then
            kill -0 $PID >/dev/null 2>&1  # 檢查進程
            if [ $? != 0 ] ;then  
                echo "[`date`] ${SPIDER_NAME}: Process $i have Dead"
                kill -9 $PID >/dev/null 2>&1  
                 
                return 1  
            else  
                echo "[`date`] ${SPIDER_NAME}: Process is alive"
                return 0  
            fi  
        fi  
    done  
    echo "[`date`] ${SPIDER_NAME}: Process $PID is not exists"
    return 1  
}  

status()
{
  PIDS=`cat ${PID_FILE}`
  if [ "$PIDS" == "" ] ;then
        echo "${SPIDER_NAME} crawler:${PID} No Process Running."
        return
    fi
  for PID in $PIDS
  do
  checkProcessStatus $PID > /dev/null
  if [ $? != 0 ];then
       echo "${SPIDER_NAME} crawler:${PID}  Have Stopped ...."
  else
       echo "${SPIDER_NAME} crawler:${PID} Running Normal."
  fi
  done

}

restart()
{
    echo "Stoping ... "
    stop
    echo "Staring ..."
    start
}

case "$1" in
    start)
        start
        ;;
    stop)
        stop
        ;;
    status)
         status
        ;;
    restart)
        restart
        ;;
    *)
        echo $"Usage: $0 {start|stop|restart|status}"
        RETVAL=1
esac
exit $RETVAL
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章