運用zabbix通過OMSA組件對服務器硬件監控

最近有點惰性,做了OMSA對硬件的監控,後面有時間把做個Python web應用,把資產統計起來,做到自動收集資產。現在講講zabbix監控的詳情。

資源在github上:https://github.com/templeydy/zabbix-omsa.git直接使用git clone就可以下載。

下面主要記錄zabbix硬件監控的具體細節,防止以後忘記。至少現在還能記得。我把UserParameter參數的監控項貼在下面,主要對電源、風扇、內存、CPU、溫度、硬盤、raid進行監控。由於網卡是同一塊網卡,故對網卡沒有進行監控。因爲我認爲網卡壞了,服務器自然就網絡不通,自然就會進行排查,監控網卡就顯得沒有什麼必要了。

UserParameter=hardware_battery,/opt/dell/srvadmin/sbin/omreport chassis batteries|awk '/^Status/{if($NF=="Ok") {print 1} else {print 0}}'
UserParameter=hardware_fan_health,awk -vhardware_fan_number=`/opt/dell/srvadmin/sbin/omreport chassis fans|grep -c "^Index"` -vhardware_fan=`/opt/dell/srvadmin/sbin/omreport chassis fans|awk '/^Status/{if($NF=="Ok") count+=1}END{print count}'` 'BEGIN{if(hardware_fan_number==hardware_fan) {print 1} else {print 0}}'
UserParameter=hardcheck.hardware_memory_health,awk -vhardware_memory=`/opt/dell/srvadmin/sbin/omreport chassis memory|awk '/^Health/{print $NF}'` 'BEGIN{if(hardware_memory=="Ok") {print 1} else {print 0}}'
#UserParameter=hardcheck.hardware_nic_health,awk -vhardware_nic_number=`/opt/dell/srvadmin/sbin/omreport chassis nics |grep -c "Interface Name"` -vhardware_nic=`/opt/dell/srvadmin/sbin/omreport chassis nics |awk '/^Connection Status/{print $NF}'|wc -l` 'BEGIN{if(hardware_nic_number==hardware_nic) {print 1} else {print 0}}'
UserParameter=hardcheck.hardware_cpu,/opt/dell/srvadmin/sbin/omreport chassis processors|awk '/^Health/{if($NF=="Ok") {print 1} else {print 0}}'
UserParameter=hardcheck.hardware_power_health,awk -vhardware_power_number=`/opt/dell/srvadmin/sbin/omreport chassis pwrsupplies|grep -c "Index"` -vhardware_power=`/opt/dell/srvadmin/sbin/omreport chassis pwrsupplies|awk '/^Status/{if($NF=="Ok") count+=1}END{print count}'` 'BEGIN{if(hardware_power_number==hardware_power) {print 1} else {print 0}}'
#UserParameter=hardcheck.hardware_temp,/opt/dell/srvadmin/sbin/omreport chassis temps|awk '/^Status/{if($NF=="Ok") {print 1} else {print 0}}'|head -n 1
UserParameter=hardcheck.hardware_temp,awk -vhardware_temp=`/opt/dell/srvadmin/sbin/omreport chassis temps|awk '/^Main/{print $NF}'` 'BEGIN{if(hardware_temp=="Ok") {print 1} else {print 0}}'
UserParameter=hardcheck.hardware_physics_health,awk -vhardware_physics_disk_number=`/opt/dell/srvadmin/sbin/omreport storage pdisk controller=0|grep -c "^ID"` -vhardware_physics_disk=`/opt/dell/srvadmin/sbin/omreport storage pdisk controller=0|awk '/^Status/{if($NF=="Ok") count+=1}END{print count}'` 'BEGIN{if(hardware_physics_disk_number==hardware_physics_disk) {print 1} else {print 0}}'
UserParameter=hardcheck.hardware_virtual_health,awk -vhardware_virtual_disk_number=`/opt/dell/srvadmin/sbin/omreport storage vdisk controller=0|grep -c "^ID"` -vhardware_virtual_disk=`/opt/dell/srvadmin/sbin/omreport storage vdisk controller=0|awk '/^Status/{if($NF=="Ok") count+=1}END{print count}'` 'BEGIN{if(hardware_virtual_disk_number==hardware_virtual_disk) {print 1} else {print 0}}'

#UserParameter=check.zichan.system,/bin/cat /etc/redhat-release
#UserParameter=check.zichan.bmc_name,/bin/echo iDRAC_info
#UserParameter=check.dev[*],/bin/bash /Data/apps/zabbix/bin/custom/check-dev.sh $1 $2 $3


#hardware detail
UserParameter=hardwaredetail.cpu1_temp,for i in {0..3} ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i|grep "CPU1"` == *CPU1* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i|grep Reading|cut -d ':' -f 2|awk '{if (NR==1) print $1}'`; fi; done
UserParameter=hardwaredetail.cpu2_temp,for i in {0..3} ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i|grep "CPU2"` == *CPU2* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i|grep Reading|cut -d ':' -f 2|awk '{if (NR==1) print $1}'`; fi; done
UserParameter=hardwaredetail.system_board_inlet_temp,for i in {0..3} ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i|grep "Inlet"` == *Inlet* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i|grep Reading|cut -d ':' -f 2|awk '{if (NR==1) print $1}'`; fi; done
UserParameter=hardwaredetail.system_board_exhaust_temp,for i in {0..3} ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i|grep "Exhaust"` == *Exhaust* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i|grep Reading|cut -d ':' -f 2|awk '{if (NR==1) print $1}'`; fi; done
#UserParameter=hardwaredetail.memory_detail_status,for ((i=0;i<`/opt/dell/srvadmin/sbin/omreport  chassis memory|grep "Slots Used"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'`;i++));do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis memory index=$i|grep "Status"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'` != "Ok" ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis memory index=$i|grep "Device Name"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'`" memory is bad; and errer code is "`/opt/dell/srvadmin/sbin/omreport  chassis memory index=$i|grep "Failures"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'`;else echo "OK"; fi;done
#UserParameter=hardwaredetail.memory_detail_status,for i in `seq 0 $[$(/opt/dell/srvadmin/sbin/omreport  chassis memory|grep "Slots Used"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g')-1]`;do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis memory index=$i|grep "Status"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'` != "Ok" ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis memory index=$i|grep "Device Name"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'`" memory is bad; and errer code is "`/opt/dell/srvadmin/sbin/omreport  chassis memory index=$i|grep "Failures"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'`;else echo "OK"; fi;done
UserParameter=hardwaredetail.memory_detail_status,if [[ `/opt/dell/srvadmin/sbin/omreport chassis memory|awk '/^Health/{print $NF}'` =~ "Ok" ]];then echo "OK";else for i in `seq 0 $[$(/opt/dell/srvadmin/sbin/omreport  chassis memory|grep "Slots Used"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g')-1]`;do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis memory index=$i|grep "Status"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'` != "Ok" ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis memory index=$i|grep "Device Name"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'`" memory is bad; and errer code is "`/opt/dell/srvadmin/sbin/omreport  chassis memory index=$i|grep "Failures"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'`; fi;done;fi
UserParameter=hardwaredetail.hardlog_detail_status,if [[ `/opt/dell/srvadmin/sbin/omreport system esmlog|tail -n 4|grep "Severity"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'` != "Ok" ]];then echo `/opt/dell/srvadmin/sbin/omreport system esmlog|tail -n 4`;else echo "OK";fi
UserParameter=hardwaredetail.alertlog_detail_status,if [[ `/opt/dell/srvadmin/sbin/omreport system alertlog|tail -n 6|grep "Severity"|awk -F: '{if (NR==1) print $2}'|sed 's/^[ \t]*//g'` != "Ok" ]];then echo `/opt/dell/srvadmin/sbin/omreport system alertlog|tail -n 6`;else echo "OK";fi
#UserParameter=hardwaredetail.fan_fail_status,for i in `seq 0 $[$(/opt/dell/srvadmin/sbin/omreport chassis fans|grep "Index"|wc -l)-1]` ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis fans index=$i|grep "Status"` != *Ok* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i`;else echo "OK"; fi; done
UserParameter=hardwaredetail.fan_fail_status,if [[ `/opt/dell/srvadmin/sbin/omreport chassis fans|grep -c "^Index"` == `/opt/dell/srvadmin/sbin/omreport chassis fans|awk '/^Status/{if($NF=="Ok") count+=1}END{print count}'` ]];then echo "OK";else for i in `seq 0 $[$(/opt/dell/srvadmin/sbin/omreport chassis fans|grep "Index"|wc -l)-1]` ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis fans index=$i|grep "Status"` != *Ok* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i`; fi; done; fi
#UserParameter=hardwaredetail.fan_fail_status,for ((i=0;i<`/opt/dell/srvadmin/sbin/omreport chassis fans|grep "Index"|wc -l`;i++)) ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis fans index=$i|grep "Status"` != *Ok* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i`;else echo "OK"; fi; done
#UserParameter=hardwaredetail.power_fail_status,for ((i=0;i<`/opt/dell/srvadmin/sbin/omreport chassis pwrsupplies|grep "Index"|wc -l`;i++)) ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis pwrsupplies index=$i|grep "Status"` != *Ok* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis pwrsupplies index=$i`;else echo "OK"; fi; done
#UserParameter=hardwaredetail.cpu_fail_status,for ((i=0;i<`/opt/dell/srvadmin/sbin/omreport chassis processors|grep "Index"|wc -l`;i++)) ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis processors index=$i|grep "Status"` != *Ok* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis processors index=$i`;else echo "OK"; fi; done
#UserParameter=hardwaredetail.temp_fail_status,for ((i=0;i<`/opt/dell/srvadmin/sbin/omreport chassis temps|grep "Index"|wc -l`;i++)) ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i|grep "Status"` != *Ok* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i`;else echo "OK"; fi; done
UserParameter=hardwaredetail.temp_fail_status,if [[ `/opt/dell/srvadmin/sbin/omreport chassis temps|awk '/^Main/{print $NF}'` == "Ok" ]];then echo "OK";else for ((i=0;i<`/opt/dell/srvadmin/sbin/omreport chassis temps|grep "Index"|wc -l`;i++)) ; do if [[ `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i|grep "Status"` != *Ok* ]]; then echo `/opt/dell/srvadmin/sbin/omreport  chassis temps index=$i`; fi; done;fi
UserParameter=hardwaredetail.disk_fail_status,if [[ `sudo /opt/MegaRAID/MegaCli/MegaCli64 -PDList -aALL -NoLog|grep "Firmware state"` =~ Online\,\ Spun\ Up|Hotspare\,\ Spun\ Up ]]; then echo "OK";else echo `sudo /opt/MegaRAID/MegaCli/MegaCli64 -PDList -aALL -NoLog|grep "Firmware state"|grep -v -E "Online, Spun Up|Hotspare, Spun Up"`;fi

zabbix效果:



僅僅記錄於此,有的需要優化,暫不打算優化,腳本需要修改修改。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章