1, 把check_cpu.sh 文件放在libexec目錄下。
#!/bin/sh # Filename: check_cpu.sh procinfo=`which procinfo 2>/dev/null` sar=`which sar 2>/dev/null` function help { echo -e "\n\tThis plugin shows the % of used CPU, using either procinfo or sar (whichever is available)\n\n\t$0:\n\t\t-c <integer>\tIf the % of used CPU is above <integer>, returns CRITICAL state\n\t\t-w <integer>\tIf the % of used CPU is below CRITICAL and above <integer>, returns WARNING state\n" exit -1 } # Getting parameters: while getopts "w:c:h" OPT; do case $OPT in "w") warning=$OPTARG;; "c") critical=$OPTARG;; "h") help;; esac done # Checking parameters: ( [ "$warning" == "" ] || [ "$critical" == "" ] ) && echo "ERROR: You must specify warning and critical levels" && help [[ "$warning" -ge "$critical" ]] && echo "ERROR: critical level must be highter than warning level" && help # Assuring that the needed tools exist: ( ( [ -f $procinfo ] && command="procinfo") || [ -f $sar ] ) || \ ( echo "ERROR: You must have either procinfo or sar installer in order to run this plugin" && exit -1 ) # Doing the actual check: idle=`top -b -n 1 | grep Cpu |awk '{print $5}' |cut -f 1 -d "%"` used=10 # Comparing the result and setting the correct level: if [[ $used -ge $critical ]]; then msg="CRITICAL" status=2 else if [[ $used -ge $warning ]]; then msg="WARNING" status=1 else msg="OK" status=0 fi fi # Printing the results: echo "$msg - CPU used=$used% idle=$idle% | 'CPU Usage'=$used%;$warning;$critical;" # Bye! exit $status
修改執行權限。
2.在監控的主機中加入如下命令, 我的是在localhost.cfg 加入:
define service{
host_name 153
use generic-service
check_command check_nrpe!check_cpu_233
service_description check_cpu_233
notifications_enabled 1
event_handler_enabled 1
max_check_attempts 3
check_interval 5
retry_check_interval 2
notification_options w,u,c
}
3. 在被監控機器上, 在nrpe.cfg 中添加 :
command[check_cpu_233]=/usr/local/nagios/libexec/check_cpu.sh –w 60 –c80
安裝和使用nrpe可以參考http://blog.csdn.net/li744831579/article/details/22874691