shell腳本-切換軟鏈接文件(nagios監控)

shell腳本-切換軟鏈接文件(nagios監控)

任務:需要在nagios中定義服務去檢測3個DC的狀態(1.主機狀態、2.consul cluster狀態、3.nomad cluster狀態),只要其中某個服務狀態失效,就觸發nagios eventhandler去改變dns服務器的鏈接文件,如上圖所示。

腳本:腳本中的服務器地址和實際的不同

腳本1:該腳本檢測3個DC的服務狀態,根據檢測到的結果會輸出目前dns應該鏈接的文件名,nagios上會顯示該文件名。如果dns沒有鏈接到正確的文件名,nagios就會報警並觸發event-handler。

#!/bin/bash
#Detection DC host status、consul cluster status、nomad cluster status
DATE=`date +%Y%m%d%H%M%S`

#DC:US(tier1001 and tier1002)
#DC:EU(tier2001 and tier2002)
#DC:AS(tier3001 and tier3002)

#All DC -> axel-geo_us_eu_as.yml default
#DC-EU down -> axel-geo_us_as.yml  if DC-EU down
#DC-AS down -> axel-geo_us_eu.yml  if DC-AS down
#DC-US down -> axel-geo_eu_as.yml  if DC-US down

#detection dc(US) ping status     #檢測3個DC的主機狀態,通過nagios自帶插件check_ping去檢測
PING_1001=`/usr/lib64/nagios/plugins/check_ping -4 -H tier1001 -w 3000.0,80% -c 5000.0,100% -p 5|awk '{print $2}'`
PING_1002=`/usr/lib64/nagios/plugins/check_ping -4 -H tier1002 -w 3000.0,80% -c 5000.0,100% -p 5|awk '{print $2}'`
#detection dc(EU) ping status
PING_2001=`/usr/lib64/nagios/plugins/check_ping -4 -H tier2001 -w 3000.0,80% -c 5000.0,100% -p 5|awk '{print $2}'`
PING_2002=`/usr/lib64/nagios/plugins/check_ping -4 -H tier2002 -w 3000.0,80% -c 5000.0,100% -p 5|awk '{print $2}'`
#detection dc(AS) ping status
PING_3001=`/usr/lib64/nagios/plugins/check_ping -4 -H tier3001 -w 3000.0,80% -c 5000.0,100% -p 5|awk '{print $2}'`
PING_3002=`/usr/lib64/nagios/plugins/check_ping -4 -H tier3002 -w 3000.0,80% -c 5000.0,100% -p 5|awk '{print $2}'`

#detection dc(US) consul          #檢測3個DC的consul cluster狀態,通過nrpe調用遠程主機上的腳本
if /usr/lib64/nagios/plugins/check_nrpe -H tier1001.axel.network -c check_consul_cluster &>/dev/null ; then CON_US=0 ; else CON_US=1 ; fi
#detection dc(EU) consul
if /usr/lib64/nagios/plugins/check_nrpe -H tier2001.axel.network -c check_consul_cluster &>/dev/null ; then CON_EU=0 ; else CON_EU=1 ; fi
#detection dc(AS) consul
if /usr/lib64/nagios/plugins/check_nrpe -H tier3001.axel.network -c check_consul_cluster &>/dev/null ; then CON_AS=0 ; else CON_AS=1 ; fi

#detection dc(US) nomad        #檢測3個DC的nomad cluster狀態,通過nrpe調用遠程主機上的腳本
if /usr/lib64/nagios/plugins/check_nrpe -H tier1001.axel.network -c check_nomad_cluster &>/dev/null ; then NOM_US=0 ; else NOM_US=1 ; fi
#detection dc(EU) nomad
if /usr/lib64/nagios/plugins/check_nrpe -H tier2001.axel.network -c check_nomad_cluster &>/dev/null ; then NOM_EU=0 ; else NOM_EU=1 ; fi
#detection dc(AS) nomad
if /usr/lib64/nagios/plugins/check_nrpe -H tier3001.axel.network -c check_nomad_cluster &>/dev/null ; then NOM_AS=0 ; else NOM_AS=1 ; fi

#detection corrent linkfile         #檢測dns服務器上目前鏈接的文件名是什麼
FILE=`/usr/lib64/nagios/plugins/check_nrpe -H romeo.zencoo.com -c check_pdns_link`
[ ! -n "$FILE" ] && {
echo '$FILE is NULL'
exit 1
}

#detection service function     #將每個DC的三個服務做判斷,一個DC中,只有所有服務狀態都正常,該DC的變量被賦值0(比如US被賦值爲0)
function service {
#detection ping 
[ "$PING_1001" == "OK" -a "$PING_1002" == "OK" ] && PING_US=0 || PING_US=1
[ "$PING_2001" == "OK" -a "$PING_2001" == "OK" ] && PING_EU=0 || PING_EU=1
[ "$PING_3001" == "OK" -a "$PING_3002" == "OK" ] && PING_AS=0 || PING_AS=1
#detection all status 
[ "$PING_US" -eq 0 ] && [ "$CON_US" -eq 0 ] && [ "$NOM_US" -eq 0 ] && US=0 || US=1
[ "$PING_EU" -eq 0 ] && [ "$CON_EU" -eq 0 ] && [ "$NOM_EU" -eq 0 ] && EU=0 || EU=1
[ "$PING_AS" -eq 0 ] && [ "$CON_AS" -eq 0 ] && [ "$NOM_AS" -eq 0 ] && AS=0 || AS=1
}

service

#判斷是否需要切換鏈接文件,如果需要,退出狀態碼就是2,nagios就會報警,觸發event-handler
if [ ${US} -eq 0 ] && [ ${EU} -eq 0 ] && [ ${AS} -eq 0 ] && [ "$FILE" == "axel-geo_us_eu_as.yml" ];then
   echo "all-DC-is ok,->already axel-geo_us_eu_as.yml";exit 0
elif [ ${US} -eq 0 ] && [ ${EU} -eq 0 ] && [ ${AS} -eq 0 ] && [ "$FILE" != "axel-geo_us_eu_as.yml" ];then
   echo "axel-geo_us_eu_as.yml";exit 2
elif [ ${US} -eq 1 -a "$FILE" != "axel-geo_eu_as.yml" ];then
   echo "axel-geo_eu_as.yml";exit 2
elif [ ${EU} -eq 1 -a "$FILE" != "axel-geo_us_as.yml" ];then
   echo "axel-geo_us_as.yml";exit 2
elif [ ${AS} -eq 1 -a "$FILE" != "axel-geo_us_eu.yml" ];then
   echo "axel-geo_us_eu.yml";exit 2
else
   echo "link file is ${FILE}"
   exit 0
fi

腳本2:觸發event-handler的腳本

#!/bin/bash
#check_service_status.sh dection All dc host status、consul status、nomad status.
#script return a file name ($2 following four)
#All DC -> axel-geo_us_eu_as.yml default
#DC-EU down -> axel-geo_us_as.yml  if DC-EU down
#DC-AS down -> axel-geo_us_eu.yml  if DC-AS down
#DC-US down -> axel-geo_eu_as.yml  if DC-US down

WORKDIR=/usr/lib64/nagios/plugins
DATE=`date +%Y%m%d%H%M%S`
LOG=/tmp/.dns_linkfile
exec &>>${LOG}

case $1 in     #$1就是nagios檢測服務的狀態碼,如果報警就是CRITICAL
OK)
   #correct link file
   exit 0
   ;;
CRITICAL)    #$2是nagios上顯示的信息,也就是文件名,然後通過nrpe去調用dns服務器上的腳本更改鏈接文件
   #need to switch link file
   case $2 in
     axel-geo_us_eu_as.yml)
          #DC-EU、DC-AS、DC-US state ok,linkfile->axel-geo_us_eu_as.yml
          REMOTE_CMD=update_us_eu_as
       ;;  
     axel-geo_us_as.yml)
          #DC-EU down,linkfile->axel-geo_us_as.yml
          REMOTE_CMD=update_us_as
       ;;  
     axel-geo_us_eu.yml)
          #DC-AS down, linkfile->axel-geo_us_eu.yml
          REMOTE_CMD=update_us_eu
       ;;  
     axel-geo_eu_as.yml)
          #DC-US down, linkfile->axel-geo_eu_as.yml
          REMOTE_CMD=update_eu_as
       ;;
                      *)
          #default output
          echo "${DATE}--warining,no file match"
          exit 1 
       ;;
     esac
          echo "${DATE}--${WORKDIR}/check_nrpe -H {ns1,ns2}.zencoo.com -c ${REMOTE_CMD}"
          ${WORKDIR}/check_nrpe -H DNS1 -c ${REMOTE_CMD}          
          ${WORKDIR}/check_nrpe -H DNS2 -c ${REMOTE_CMD}
   ;;
esac
exit 0

腳本3:更改DNS服務上的鏈接文件

#!/bin/bash
#The script is called in the check_dc_status and change_dns_linkfile scripts
LOG=/tmp/.dns_linkfile
DATE=`date +%Y%m%d%H%M%S`
DIR=/etc/pdns
LN=axel-geo.yml
FILE="`ls -l ${DIR}/${LN} | sed -n '/^l/p'|sed 's/.*-> //g'`"

#$1 is check_dc_status and change_dns_linkfile passed parameters
case $1 in     #前兩個腳本會通過nrpe來調用該腳本,$1就是傳入的參數
check)
   FILE="`ls -l ${DIR}/${LN} | sed -n '/^l/p'|sed 's/.*-> //g'`"
   echo "$FILE" 
   exit 0
   ;;
us_eu_as)
   TAGETFILE="${DIR}/axel-geo_us_eu_as.yml"
   ;;
us_as)
   TAGETFILE="${DIR}/axel-geo_us_as.yml"
   ;;
us_eu)
   TAGETFILE="${DIR}/axel-geo_us_eu.yml"
   ;;
eu_as)
   TAGETFILE="${DIR}/axel-geo_eu_as.yml"
   ;;
*)
   echo '$1 error' >>${LOG}
   exit 1
   ;;
esac

if [ ! -f ${TAGETFILE} ];then
 echo '$TAGETFILE does not exist/${DATE}' >>${LOG}
 exit 1
elif  [ "$FILE" == "$TAGETFILE" ];then
 echo "${DATE}-Link file is correct, no need to switch" >>${LOG}
 exit 0
else
 echo "${HOSTNAME}/${DATE} ln -snf $TAGETFILE ${DIR}/${LN}" >>${LOG}
sudo /usr/bin/ln -snf $TAGETFILE ${DIR}/${LN}  
sudo /bin/pdns_control reload && echo "${DATE}-reload dns ok" >>${LOG} || echo "${DATE}-reload dns failed" >>${LOG}
 exit 0
fi

nagios配置 #定義檢測服務,定義event-handler

define service{
        use                             generic-service
        host_name                         xxx
        service_description             check_dc_status
        contact_groups                  admins,admins_jabber
        check_command                   check_nrpe_t60!check_dc_status   #調用檢測服務狀態的腳本(腳本1)
        event_handler                   change_dns_linkfile                             #調用event命令
        }

define command {
        command_name    change_dns_linkfile          #$SERVICESTATE$ $SERVICEOUTPUT$  對應腳本2中的$1和$2
        command_line    $USER1$/eventhandlers/change_dns_linkfile $SERVICESTATE$ $SERVICEOUTPUT$     
        }

puppet配置 #腳本1和腳本2會通過nrpe調用腳本3,需要定義相應的命令以及對應的參數

<% if @fqdn == 'dns1xxxx' or @fqdn == 'dns2xxxx' -%>
command[check_pdns_link]=<%= @pluginsdir %>/dns_file_check.sh check                    
command[update_us_eu_as]=<%= @pluginsdir %>/dns_file_check.sh us_eu_as
command[update_us_eu]=<%= @pluginsdir %>/dns_file_check.sh us_eu
command[update_us_as]=<%= @pluginsdir %>/dns_file_check.sh us_as
command[update_eu_as]=<%= @pluginsdir %>/dns_file_check.sh eu_as
<% end -%>

第一次弄nagios event-handler,感覺很亂,腳本還要再繼續完善

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章