nagios監控配置

作爲Linux系統管理員，系統的狀態不可能隨時查看，流量異常，負載突然增高，可能引起事故的發生。通過監控軟件可以自動監控系統狀態，發現異常就會報警，也可通過腳本監控。

1、Nagios

是開源軟件，免費使用，可以監控主機狀態，windows、linux、打印機都可以使用。

基於web界面，登錄網站查看各項指標。

支持短信或郵件通知。

可以自定義腳本實現自定義化監控。

官網www.nagios.org

2、安裝nagios服務端

分爲服務端（centos6.4 192.168.0.105）和客戶端（centos6.4 192.168.0.104）。客戶端監控主機的狀態，數據上報給服務端，服務端去處理數據。

安裝擴展源

[root@client ~]# rpm -ivh http://www.lishiming.net/data/p_w_upload/forum/month_1211/epel-release-6-7.noarch.rpm
//有些系統默認已經安裝

安裝nagios

[root@client ~]# yum install -y httpd nagios nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe

設置http登錄密碼

[root@client ~]# htpasswd -c /etc/nagios/passwd nagiosadmin
New password:
Re-type new password:
Adding password for user nagiosadmin

查看配置文件

[root@client ~]# vim /etc/nagios/nagios.cfg

檢測配置文件是否出錯

[root@client ~]# nagios -v /etc/nagios/nagios.cfg

啓動服務

[root@client ~]# service httpd start
Starting httpd: httpd: apr_sockaddr_info_get() failed for client
httpd: Could not reliably determine the server's fully qualified domain name, using 127.0.0.1 for ServerName
                                                           [  OK  ]
[root@client ~]# service nagios start
Starting nagios: done.IEshang

瀏覽器上訪問:http://192.168.0.105/nagios

3、安裝nagios客戶端

安裝擴展源

[root@localhost ~]# rpm -ivh http://www.lishiming.net/data/p_w_upload/forum/month_1211/epel-release-6-7.noarch.rpm

安裝nagios

[root@localhost ~]# yum install -y nagios-plugins nagios-plugins-all nrpe nagios-plugins-nrpe

修改配置文件

[root@localhost ~]# vim /etc/nagios/nrpe.cfg
allowed_hosts=127.0.0.1,192.168.0.105
ont_blame_nrpe=1

啓動客戶端

[root@localhost ~]# /etc/init.d/nrpe start
Starting nrpe:                                             [  OK  ]

4、監控中心添加被監控主機（服務端）

[root@client ~]# cd /etc/nagios/conf.d/
[root@client conf.d]# vim 192.168.0.104.cfg   //客戶端IP命令
define host{
        use                     linux-server            ; Name of host template to use
                                                        ; This host definition will inherit all variables that are defined
                                                        ; in (or inherited by) the linux-server host template definition.
        host_name               192.168.0.104
        alias                   0.12
        address                 192.168.0.104
        }
define service{
        use                     generic-service
        host_name               192.168.0.104
        service_description     check_ping
        check_command           check_ping!100.0,20%!200.0,50%
        max_check_attempts 5
        normal_check_interval 1
}
//監控ping服務
define service{
        use                     generic-service
        host_name               192.168.0.104
        service_description     check_ssh
        check_command           check_ssh
        max_check_attempts      5
        normal_check_interval 1
}
//監控ssh服務
define service{
        use                     generic-service
        host_name               192.168.0.104
        service_description     check_http
        check_command           check_http
        max_check_attempts      5
        normal_check_interval 1
}
//監控http服務

[root@client ~]# cd /etc/nagios/conf.d/
[root@client conf.d]# vim 192.168.0.104.cfg
define host{
        use                     linux-server           
//Name of host template to use
 //This host definition will inherit all variables that are defined
 //in (or inherited by) the linux-server host template definition.
        host_name               192.168.0.12
        alias                   0.12
        address                 192.168.0.12
        }
define service{
        use                     generic-service
        host_name               192.168.0.12
        service_description     check_ping
        check_command           check_ping!100.0,20%!200.0,50%
        max_check_attempts 5     //遇到問題，檢測5次在報警
        normal_check_interval 1   //重新檢測時間間隔，1分鐘
        notification_interval  60   //服務出現異常後，故障一直沒解決，對使用者隔60分鐘發出通知
}
//監控ping服務
define service{
        use                     generic-service
        host_name               192.168.0.12
        service_description     check_ssh
        check_command           check_ssh
        max_check_attempts      5
        normal_check_interval 1
}
//監控ssh服務
define service{
        use                     generic-service
        host_name               192.168.0.12
        service_description     check_http
        check_command           check_http
        max_check_attempts      5
        normal_check_interval 1
}
//監控http服務

5、實現監控遠程的服務（服務端）

[root@client conf.d]# vim /etc/nagios/objects/commands.cfg
//添加下面的語句
define command{
        command_name    check_nrpe
        command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
        }
[root@client conf.d]# vim /etc/nagios/conf.d/192.168.0.104.cfg
define service{
        use     generic-service
        host_name       192.168.0.105
        service_description     check_load
        check_command           check_nrpe!check_load
//check_load是遠程主機上的檢測腳本
        max_check_attempts 5
        normal_check_interval 1
}
define service{
        use     generic-service
        host_name       192.168.0.105
        service_description     check_disk_hda1
        check_command           check_nrpe!check_hda1
        max_check_attempts 5
        normal_check_interval 1
}
define service{
        use     generic-service
        host_name       192.168.0.105
        service_description     check_disk_hda2
        check_command           check_nrpe!check_hda2
        max_check_attempts 5
        normal_check_interval 1
}

查看check_load（客戶端）

[root@localhost ~]# vim /etc/nagios/nrpe.cfg
command[check_users]=/usr/lib/nagios/plugins/check_users -w 5 -c 10
command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1
//hda1修改成sda1
//剩餘20%就報警，剩餘10%報紅色警
command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200

[root@localhost ~]# /usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda1
DISK OK - free space: /boot 429 MB (93% inode=99%);| /boot=29MB;387;435;0;484
//可以自定義寫監控腳本，只要產生的結果格式是一致的

定義一個check_hda2

[root@localhost ~]# vim /etc/nagios/nrpe.cfg   //客戶端
//添加下面一句
command[check_hda2]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda2

客戶端、服務端重啓服務，刷新頁面測試

查看監控服務

[root@client conf.d]# ls /usr/lib/nagios/plugins/
check_breeze    check_icmp         check_nrpe      check_smtp
check_by_ssh    check_ide_smart    check_nt        check_snmp
check_clamd     check_imap         check_ntp       check_spop
check_cluster   check_ircd         check_ntp_peer  check_ssh
check_dhcp      check_jabber       check_ntp.pl    check_ssmtp
check_dig       check_ldap         check_ntp_time  check_swap
check_disk      check_ldaps        check_nwstat    check_tcp
check_disk_smb  check_load         check_oracle    check_time
check_dns       check_log          check_overcr    check_udp
check_dummy     check_mailq        check_pgsql     check_ups
check_file_age  check_mrtg         check_ping      check_users
check_flexlm    check_mrtgtraf     check_pop       check_wave
check_fping     check_mysql        check_procs     eventhandlers
check_ftp       check_mysql_query  check_real      negate
check_game      check_nagios       check_rpc       urlize
check_hpjd      check_nntp         check_sensors   utils.pm
check_http      check_nntps        check_simap     utils.sh
//監控命令，大多是二進制文件

6、配置郵件告警

[root@client conf.d]# vim /etc/nagios/objects/contacts.cfg
define contact{
        contact_name               123    //自定義名字
        use                             generic-contact  //模板
        alias                           aming
        email              [email protected] //郵件
        }

自定義告警策略

notifications_enabled
//是否開啓提醒功能。1爲開啓，0爲禁用。一般，這個選項會在主配置文件（nagios.cfg）中定義，效果相同。
notification_interval
//之前剛介紹過，表示重複發送提醒信息的最短間隔時間。默認間隔時間是60分鐘。如果這個值設置爲0，將不會發送重複提醒。
notification_period
//發送提醒的時間段。非常重要的主機（服務）我定義爲7×24，一般的主機（服務）就定義爲上班時間。如果不在定義的時間段內，無論什麼問題發生，都不會發送提醒。
notification_options
//這個參數定義了發送提醒包括的情況：d = 狀態爲DOWN, u = 狀態爲UNREACHABLE , r = 狀態恢復爲OK ,  f = flapping，n=不發送提醒。

nagios監控配置

Kafka存儲機制

aws語音呼叫調用，告警電話

【轉】[C#] WebAPI 防止併發調用二（冥等性）

HTTP URL 詳解

創新工具：2024年開發者必備的一款表格控件（二）

車牌識別控制檯可快速整合二次開發

win7訪問linux上的NFS

LAMP搭建論壇

NFS網絡文件系統

Linux郵件服務

win7掃盲篇--介紹安裝（1）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結