背景:对于某些系统关键服务需要做到进程意外停止后立即重新启动,以尽量降低用户受到的影响,为减少人工干预及缩短故障处理时间,可以使用一些自动化监控工具,这里记录下Linux系统下简单易用的小工具monit。
安装:
yum install monit
常用命令:
monit -V # 查看版本
monit -t # 配置文件检测
service monit start # 服务启动
monit # 启动monit daemon
monit -c /var/monit/monitrc # 启动monit daemon时指定配置文件
monit reload # 重新加载配置文件
chkconfig monit on/systemctl enable monit # 加入开机启动
monit status # 查看所有服务状态
monit status nginx # 查看nginx服务状态
monit stop all # 停止所有服务
monit stop nginx # 停止nginx服务
monit start all # 启动所有服务
monit start nginx # 启动nginx服务
monit unmonitor tomcat #解除tomcat监控
monit monitor tomcat #加上tomcat监控
配置:
/etc/monit.conf # 主配置文件
/etc/monit.d/ # 单独配置各项服务
/var/log/monit # 默认日志路径
配置文件示例:
##vim /etc/monit.conf
set logfile /var/log/monit # 自定义日志路径
set httpd port 8080 and # 自定义web端端口,可以使用IP:端口来访问monit监控页面
allow ××.××.××.×× # 允许访问该页面的IP或IP段
allow localhost
allow admin:monit # 登陆用户'admin'密码'monit'
set mailserver smtp.gmail.com port 587 #配置邮件发送服务器
username "user" password "pwd" #配置邮箱用户密码
using tlsv1
set mail-format {
from: [email protected]
subject: Monit:$ACTION [$SERVICE][$HOST] $DESCRIPTION #邮件主题
message: #邮件内容
[$SERVICE] $EVENT
Date: $DATE
Action: $ACTION
Host: $HOST
Description: $DESCRIPTION
Monit }
#set alert [myemail] with reminder on 1 cycles
set alert [myemail] #设置接收提醒的邮箱
set daemon 30
with start delay 10
set idfile /var/.monit.id
set eventqueue
basedir /var/monit
include /etc/monit.d/*.monit #所有需要监控的进程配置在这里
配置tomcat监控举例:
##tomcat默认的配置文件没有写pid,需要手工给tomcat设置加上pid路径
vim /opt/tomcat8/bin/catalina.sh
# Get standard environment variables
PRGDIR=`dirname "$PRG"`
CATALINA_PID=$PRGDIR/catalina.pid # 加上这一行
##这样重启tomcat后就会生成pid文件
service tomcat8 start
Starting Tomcat8
……
Using CATALINA_PID: /opt/tomcat8/bin/catalina.pid
Tomcat started.
##写tomcat进程监控配置
cd /etc/monit.d
vim tomcat
## check tomcat process
check process tomcat with pidfile /opt/tomcat8/bin/catalina.pid
start program = "/etc/init.d/tomcat start"
stop program = "/etc/init.d/tomcat stop"
if changed pid then restart # 当进程不存在时重启
if 2 restarts within 3 cycles then unmonitor # 当在3个周期中重启2次后不再监控
其他常用配置:
## check apache process
check process apache with pidfile /etc/httpd/run/httpd.pid
start program = "/etc/init.d/httpd start" with timeout 60 seconds #配置超时阈值
stop program = "/etc/init.d/httpd stop"
if cpu > 60% for 2 cycles then alert #2个周期内cpu使用率超过60%邮件告警
if cpu > 80% for 5 cycles then restart #5个周期内cpu使用率超过80%重启进程
if memory usage > 80% then alert #内存使用超过80%邮件告警
if totalmem > 200.0 MB for 5 cycles then restart #5个周期内内存使用超过200M重启进程
if children > 250 then restart #子进程超过250个重启
if loadavg(5min) greater than 10 for 8 cycles then stop #8个周期内机器负载超过10停止进程
if failed host myhost.com port 80 protocol http
#and request "/login"
then restart #web 80端口访问失败则重启
if failed port 443 protocol https with timeout 15 seconds then restart ##web 443端口访问超时15秒则重启
if 3 restarts within 5 cycles then unmonitor #5个周期内重启3次则解除监控
## check nginx process
check process nginx with pidfile /var/run/nginx.pid
start program = "/etc/init.d/nginx start"
stop program = "/etc/init.d/nginx stop"
#if changed pid then restart
if failed port 80 type TCP then restart
## check memcached process
check process memcached with pidfile /var/run/memcached/memcached.pid
start program = "/etc/init.d/memcached start"
stop program = "/etc/init.d/memcached stop"
if changed pid then restart