背景:對於某些系統關鍵服務需要做到進程意外停止後立即重新啓動,以儘量降低用戶受到的影響,爲減少人工干預及縮短故障處理時間,可以使用一些自動化監控工具,這裏記錄下Linux系統下簡單易用的小工具monit。
安裝:
yum install monit
常用命令:
monit -V # 查看版本
monit -t # 配置文件檢測
service monit start # 服務啓動
monit # 啓動monit daemon
monit -c /var/monit/monitrc # 啓動monit daemon時指定配置文件
monit reload # 重新加載配置文件
chkconfig monit on/systemctl enable monit # 加入開機啓動
monit status # 查看所有服務狀態
monit status nginx # 查看nginx服務狀態
monit stop all # 停止所有服務
monit stop nginx # 停止nginx服務
monit start all # 啓動所有服務
monit start nginx # 啓動nginx服務
monit unmonitor tomcat #解除tomcat監控
monit monitor tomcat #加上tomcat監控
配置:
/etc/monit.conf # 主配置文件
/etc/monit.d/ # 單獨配置各項服務
/var/log/monit # 默認日誌路徑
配置文件示例:
##vim /etc/monit.conf
set logfile /var/log/monit # 自定義日誌路徑
set httpd port 8080 and # 自定義web端端口,可以使用IP:端口來訪問monit監控頁面
allow ××.××.××.×× # 允許訪問該頁面的IP或IP段
allow localhost
allow admin:monit # 登陸用戶'admin'密碼'monit'
set mailserver smtp.gmail.com port 587 #配置郵件發送服務器
username "user" password "pwd" #配置郵箱用戶密碼
using tlsv1
set mail-format {
from: [email protected]
subject: Monit:$ACTION [$SERVICE][$HOST] $DESCRIPTION #郵件主題
message: #郵件內容
[$SERVICE] $EVENT
Date: $DATE
Action: $ACTION
Host: $HOST
Description: $DESCRIPTION
Monit }
#set alert [myemail] with reminder on 1 cycles
set alert [myemail] #設置接收提醒的郵箱
set daemon 30
with start delay 10
set idfile /var/.monit.id
set eventqueue
basedir /var/monit
include /etc/monit.d/*.monit #所有需要監控的進程配置在這裏
配置tomcat監控舉例:
##tomcat默認的配置文件沒有寫pid,需要手工給tomcat設置加上pid路徑
vim /opt/tomcat8/bin/catalina.sh
# Get standard environment variables
PRGDIR=`dirname "$PRG"`
CATALINA_PID=$PRGDIR/catalina.pid # 加上這一行
##這樣重啓tomcat後就會生成pid文件
service tomcat8 start
Starting Tomcat8
……
Using CATALINA_PID: /opt/tomcat8/bin/catalina.pid
Tomcat started.
##寫tomcat進程監控配置
cd /etc/monit.d
vim tomcat
## check tomcat process
check process tomcat with pidfile /opt/tomcat8/bin/catalina.pid
start program = "/etc/init.d/tomcat start"
stop program = "/etc/init.d/tomcat stop"
if changed pid then restart # 當進程不存在時重啓
if 2 restarts within 3 cycles then unmonitor # 當在3個週期中重啓2次後不再監控
其他常用配置:
## check apache process
check process apache with pidfile /etc/httpd/run/httpd.pid
start program = "/etc/init.d/httpd start" with timeout 60 seconds #配置超時閾值
stop program = "/etc/init.d/httpd stop"
if cpu > 60% for 2 cycles then alert #2個週期內cpu使用率超過60%郵件告警
if cpu > 80% for 5 cycles then restart #5個週期內cpu使用率超過80%重啓進程
if memory usage > 80% then alert #內存使用超過80%郵件告警
if totalmem > 200.0 MB for 5 cycles then restart #5個週期內內存使用超過200M重啓進程
if children > 250 then restart #子進程超過250個重啓
if loadavg(5min) greater than 10 for 8 cycles then stop #8個週期內機器負載超過10停止進程
if failed host myhost.com port 80 protocol http
#and request "/login"
then restart #web 80端口訪問失敗則重啓
if failed port 443 protocol https with timeout 15 seconds then restart ##web 443端口訪問超時15秒則重啓
if 3 restarts within 5 cycles then unmonitor #5個週期內重啓3次則解除監控
## check nginx process
check process nginx with pidfile /var/run/nginx.pid
start program = "/etc/init.d/nginx start"
stop program = "/etc/init.d/nginx stop"
#if changed pid then restart
if failed port 80 type TCP then restart
## check memcached process
check process memcached with pidfile /var/run/memcached/memcached.pid
start program = "/etc/init.d/memcached start"
stop program = "/etc/init.d/memcached stop"
if changed pid then restart