添加Zabbix监控
将DockerServer添加到zabbix中进行主机层面的监控
cd /tmp
wget https://repo.zabbix.com/zabbix/2.2/rhel/7/x86_64/zabbix-release-2.2-1.el7.noarch.rpm
rpm -ivh zabbix-release-2.2-1.el7.noarch.rpm
yum clean all && yum makecache faster
yum -y install zabbix-agent
echo '192.168.1.112 zabbix_server' >> /etc/hosts
cd /etc/zabbix/
sed -i 's/^Server=127.0.0.1/Server=zabbix_server/g' zabbix_agentd.conf
sed -i 's/^ServerActive=127.0.0.1/ServerActive=zabbix_server:10051/g' zabbix_agentd.conf
sed -i "s/^\(Hostname=\).*/\1$(hostname)/g" zabbix_agentd.conf
systemctl enable zabbix-agent && systemctl start zabbix-agent
使用Filebeat归集日志到ELK
- 在harbor上保存filebeat镜像
# docker pull docker.elastic.co/beats/filebeat:7.6.2
docker pull registry.cn-hangzhou.aliyuncs.com/vinc-auto/filebeat:7.6.2
docker tag registry.cn-hangzhou.aliyuncs.com/vinc-auto/filebeat:7.6.2 \
harbor.vincent.com/library/filebeat:7.6.2
docker push harbor.vincent.com/library/filebeat:7.6.2
docker rmi registry.cn-hangzhou.aliyuncs.com/vinc-auto/filebeat:7.6.2
docker rmi harbor.vincent.com/library/filebeat:7.6.2
- 在各个DockerServer之上以容器化部署运行filebeat,将业务日志归集到ELK中
mkdir -pv /opt/filebeat && cd /opt/filebeat
docker pull harbor.vincent.com/library/filebeat:7.6.2
echo ''>filebeat.yml
docker run -d \
--name=filebeat \
--user=root \
--volume="$(pwd)/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro" \
--volume="/opt/logs:/opt/logs:ro" \
harbor.vincent.com/library/filebeat:7.6.2 filebeat
容器自维护
- 创建环境自维护容器,自动配置filebeat的配置文件并启动filebeat容器
mkdir -pv /opt/scripts && cd /opt/scripts
vi UpdateFilebeat.sh
#!/bin/bash
WDir=/opt/scripts
Log=${WDir}/main.log
Flag=0
for i in $(find /opt/logs/ -name "catalina.out")
do
grep -q $i /opt/filebeat/filebeat.yml
if [ $? -ne 0 ];then Flag=1;fi
done
if [ ${Flag} -eq 1 ]
then
echo "$(date +%F_%T) 存在未归集的web日志,重新配置filebeat并重启" >> ${Log}
cat >/tmp/filebeat.yml<<EOF
filebeat.inputs:
EOF
for i in $(find /opt/logs/ -name "catalina.out")
do
FullFilePath=${i}
ProjectName=$(echo ${i}|awk -F'/' '{print $4}')
cat >>/tmp/filebeat.yml<<EOF
- input_type: log
paths:
- ${FullFilePath}
fields:
type: "${ProjectName}"
multiline.pattern: "^[^[:blank:]]"
# multiline.pattern: '^[0-2][0-9]:[0-5][0-9]:[0-5][0-9]'
multiline.negate: true
multiline.match: after
multiline.timeout: 10s
EOF
done
cat >>/tmp/filebeat.yml<<EOF
output.elasticsearch:
hosts: ["192.168.1.250:9200"]
indices:
EOF
for i in $(find /opt/logs/ -name "catalina.out")
do
FullFilePath=${i}
ProjectName=$(echo ${i}|awk -F'/' '{print $4}')
cat >>/tmp/filebeat.yml<<EOF
- index: "${ProjectName}-catalina.out-%{+yyyy.MM.dd}"
when.equals:
fields.type: "${ProjectName}"
EOF
done
docker stop filebeat
cd /opt/filebeat/
/bin/cp -av /tmp/filebeat.yml .
docker start filebeat
fi
- 继续添加脚本,对catalina.out日志进行日切归档,并保留一周数据
- 因为有些项目日志过大,因此日切改为每小时切割一次
- catalina.out日志切割后,filebeat容器需要重启,否则会造成日志同步错误
mkdir -pv /opt/scripts && cd /opt/scripts
vi CatalinaLogArchive.sh
#!/bin/bash
WDir=/opt/scripts
Log=${WDir}/main.log
d=$(date +%Y%m%d.%H.%M.%S)
for log in $(find /opt/logs/ -name "catalina.out")
do
Path=$(echo ${log}|awk -F'catalina.out' '{print $1}')
cd ${Path}
cp catalina.out catalina.out.${d}
echo "" > catalina.out
tar -czf catalina.out.${d}.tar.gz catalina.out.${d} --remove-files
find . -name "catalina.out.*" -type f -mtime +7 -exec rm -rf {} \;
find /opt/logs/ -name "localhost_access_log*.txt" -mtime +7 -exec rm -rf {} \;
done
docker stop filebeat
docker start filebeat
- 继续添加脚本,对catalina.out进行监控,日志不刷新告警到企业微信
- 将其添加到分钟级别告警中
mkdir -pv /opt/scripts && cd /opt/scripts
vi CatalinaLogMonitor.sh
#!/bin/bash
WDir=/opt/scripts
Log=${WDir}/main.log
Ignore='XXXX'
for log in $(find /opt/logs/ -name "catalina.out")
do
Name=$(echo ${log}|awk -F'/' '{print $4}')
Mtime=$(stat ${log}|grep '^Modify'|awk -F'[ |.]' '{print $2,$3}')
FileInter=$(date -d"${Mtime}" +%s)
CurInter=$(date +%s)
Inter=$((${CurInter}-${FileInter}))
if [ ${Inter} -gt 300 ]
then
echo "$(date -d@${CurInter} +%F_%T) ${Name} catalina.out 无日志超过300秒:${Inter}s" >> ${Log}
ProName=$(echo ${Name}|awk -F'-' '{for(i=1;i<=NF-3;i++) printf("%s-",$i)}END{print $(NF-2)}')
echo ${Ignore}|grep -q ${ProName}
if [ $? -eq 0 ]
then
echo "$(date -d@${CurInter} +%F_%T) ${Name} 项目无需告警" >> ${Log}
else
echo "$(date -d@${CurInter} +%F_%T) ${Name} 项目告警到企业微信" >> ${Log}
curl -s 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=XXXXXX' \
-H "Content-Type: application/json" -d "{\"msgtype\": \"text\", \
\"text\": {\"content\": \"XXX项目告警:\n$(date -d@${CurInter} +%F_%T) ${Name} catalina.out 无日志超过300秒:${Inter}s\"}}"
fi
fi
done
- 部署main脚本
echo "$(($(date +%s)/30))">flag30
echo "$(($(date +%s)/60))">flag60
echo "$(($(date +%s)/3600))">flag3600
rm -rf main.log && touch main.log
vi main.sh
#!/bin/bash
WDir=/opt/scripts
Log=${WDir}/main.log
CurE=$(date +%s)
Cur30=$((${CurE}/30))
Cur60=$((${CurE}/60))
Cur3600=$((${CurE}/3600))
F30=$(cat ${WDir}/flag30)
F60=$(cat ${WDir}/flag60)
F3600=$(cat ${WDir}/flag3600)
if [ ${Cur30} -ne ${F30} ]
then
echo "$(date -d @${CurE} +%F_%T) 30秒 Cur30: ${Cur30} F30: ${F30}" >> ${Log}
echo ${Cur30}>${WDir}/flag30
bash ${WDir}/UpdateFilebeat.sh
fi
if [ ${Cur60} -ne ${F60} ]
then
echo "$(date -d @${CurE} +%F_%T) 60秒 Cur60: ${Cur60} F60: ${F60}" >> ${Log}
echo ${Cur60}>${WDir}/flag60
bash ${WDir}/CatalinaLogMonitor.sh
fi
if [ ${Cur3600} -ne ${F3600} ]
then
echo "$(date -d @${CurE} +%F_%T) 3600秒 Cur3600: ${Cur3600} F3600: ${F3600}" >> ${Log}
echo ${Cur3600}>${WDir}/flag3600
bash ${WDir}/CatalinaLogArchive.sh
fi
- 启动容器自维护
docker run -it -d \
--restart=always -u root --name main \
-v /usr/bin/docker:/usr/bin/docker \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /usr/lib64/libltdl.so.7:/usr/lib/x86_64-linux-gnu/libltdl.so.7 \
-v /opt:/opt centos:7 \
/bin/sh -c "/bin/cp -av /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
while true;do bash /opt/scripts/main.sh;sleep 10;done"
# 监控容器自维护日志
tailf /opt/scripts/main.log
- 继续添加脚本,对tomcat进行监控,告警到企业微信
- 功能未完成,只是简单举例
# 下载1.8的jstat执行命令,因为容器使用的jre,没有jstat命令
cd /tmp/
wget --no-check-certificate https://wget.xxxxxxx.com:10194/jdk/jdk-8u241-linux-x64.tar.gz
tar -xf jdk-8u241-linux-x64.tar.gz
cp -av /tmp/jdk1.8.0_241/bin/jstat /opt/scripts/
# 在main容器中使用以下脚本,将jstat复制到容器中,然后使用jstat监控jvm状态
# 当天数<次数时,jvm需要调优,正常情况下一天最多有一次
docker exec -it main bash
for i in $(docker ps|grep 8080|awk '{print $NF}')
do
docker cp /opt/scripts/jstat ${i}:/usr/local/java/java_1.8/bin/jstat
echo "for i in \$(jps|grep -v Jps|awk '{print \$1}');do jstat -gc -t \$i 1000 2; done"|docker exec -i ${i} bash|\
tail -1|awk -v name=${i} '{Day=int($1/24/3600);if(Day<=$16) printf "%s:\n%s\n\t%d天\t%d次\n\n",name,$0,Day,$16}'
done
exit
[TOC]