當然思路和腳本參考了網上的,但網上的那些有好多錯誤,以下爲本人經過更改調試後的。
如有疑問可以聯繫我 QQ: 279379936,一起改進優化。
Centos6下安裝easy_install
# yum install python-setuptools
安裝python 的docker模塊
# easy_install docker-py
sudo:sorry, you must have a tty to run sudo
使用不同賬戶,執行執行腳本時候sudo經常會碰到 sudo: sorry, you must have a tty to run sudo這個情況,其實修改一下sudo的配置就好了
# vim /etc/sudoers (最好用visudo命令)
註釋掉 Default requiretty 一行
#Default requiretty
意思就是sudo默認需要tty終端。註釋掉就可以在後臺執行了。
Zabbix客戶端的部署:
#vim /opt/zabbix/etc/zabbix_agentd.conf
#docker
UserParameter=docker_discovery[*],cat/opt/zabbix/script/docker_cons.txt //用來發現宿主機上存活的容器
UserParameter=docker_stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 //用來監控容器的各種指標,後面會腳本具體體現,看不懂腳本的請路過。
UserParameter=docker.tomcat.discovery,cat/opt/zabbix/script/docker_tomcat.txt //用來發現容器啓動的tomcat服務
UserParameter=docker.tomcat.stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 $3 //用來監控容器中tomcat的端口
UserParameter=docker.nginx.discovery,cat/opt/zabbix/script/docker_nginx.txt //用來發現容器啓動的nginx服務
UserParameter=docker.nginx.stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 $3 //用來監控容器中nginx的端口
監控腳本1,用來監控容器的CPU 內存 網卡,服務端口
#cat /opt/zabbix/script/zabbix_monitor_docker.py
#!/usr/bin/envpython
#-*- coding:utf-8 -*-
#email:[email protected]
from dockerimport Client
import sys
import subprocess
import os
import time
import commands
defcheck_container_stats(container_name,collect_item):
container_collect=docker_client.stats(container_name)
container_collect.next()
old_result=eval(container_collect.next())
new_result=eval(container_collect.next())
container_collect.close()
if collect_item == 'cpu_total_usage':
result=new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']
elif collect_item == 'cpu_system_usage':
result=new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']
elif collect_item == 'cpu_percent':
cpu_total_usage=new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']
cpu_system_uasge=new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']
cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])
result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)
elif collect_item == 'mem_usage':
result=new_result['memory_stats']['usage']
elif collect_item == 'mem_limit':
result=new_result['memory_stats']['limit']
elif collect_item == 'mem_percent':
mem_usage=new_result['memory_stats']['usage']
mem_limit=new_result['memory_stats']['limit']
result=round(float(mem_usage)/float(mem_limit)*100.0,2)
elif collect_item == 'network_rx_bytes':
network_check_command="""dockerexec %s ifconfig eth1|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print$1,$2}'|awk -F ')' '{print $1}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name
network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
#print time.time()
#print network_old_result
time.sleep(1)
network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
#print time.time()
#print network_new_result
#unit b
result=int(network_new_result['rx']) -int(network_old_result['rx'])
elif collect_item == 'network_tx_bytes':
network_check_command="""dockerexec %s ifconfig eth1|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print$1,$2}'|awk -F ')' '{print $1}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name
network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
time.sleep(1)
network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
result=int(network_new_result['tx']) -int(network_old_result['tx'])
return result
if __name__ =="__main__":
docker_client = Client(base_url='unix://var/run/docker.sock',version='1.19')
if len(sys.argv) == 3:
container_name=sys.argv[1]
collect_item=sys.argv[2]
printcheck_container_stats(container_name,collect_item)
elif len(sys.argv) == 4 and sys.argv[2] =='port':
container_name=sys.argv[1]
collect_item=int(sys.argv[3])
check_stat=commands.getoutput("/usr/bin/docker exec %s netstat-ntpul|grep %s > /dev/null;echo $?" %(container_name,collect_item))
print check_stat
else:
print '1'
說明:上面腳本爲通過python的docker模塊去抓取數據,由於各種原因,有些機器無法安裝python模塊,可通過下面腳本實現:
# cat /opt/zabbix/script/zabbix_monitor_docker.py
#!/usr/bin/envpython
#-*- coding:utf-8 -*-
#email:[email protected]
import sys
importsubprocess
import time
import commands
import re
defget_memory_container_dir(memory_dir,container_name):
con_id=commands.getoutput("sudo/usr/bin/docker ps|grep %s|awk '{print $1}'" % container_name)
con_full_id=commands.getoutput("ls -al%s|grep '%s'|grep -v grep|awk '{print $NF}'" % (memory_dir,con_id))
memory_container_dir=memory_dir + '/' +con_full_id
return memory_container_dir
defget_cpu_container_dir(cpu_dir,container_name):
con_id=commands.getoutput("sudo/usr/bin/docker ps|grep %s|awk '{print $1}'" % container_name)
con_full_id=commands.getoutput("ls -al%s|grep '%s'|grep -v grep|awk '{print $NF}'" % (cpu_dir,con_id))
cpu_container_dir=cpu_dir + '/' +con_full_id
return cpu_container_dir
defget_cpu_info(container_name):
info = commands.getoutput('echo -ne"GET /containers/%s/stats?stream=false HTTP/1.1\r\n\r\n"|sudo/usr/bin/nc -U /var/run/docker.sock|grep read' % container_name)
info = eval(info)
return info
defcheck_container_stats(container_name,collect_item):
if collect_item == 'cpu_total_usage':
old_result =get_cpu_info(container_name)
new_result =get_cpu_info(container_name)
old_time = old_result['read']
new_time = new_result['read']
list_old_time = re.split('\:|\.',old_time)
list_new_time =re.split('\:|\.',new_time)
old_s = int(list_old_time[2])
new_s = int(list_new_time[2])
if old_s >= new_s:
time_interval = 2
else:
time_interval = new_s - old_s
result=(new_result['cpu_stats']['cpu_usage']['total_usage']- old_result['cpu_stats']['cpu_usage']['total_usage']) / time_interval
elif collect_item == 'cpu_system_usage':
old_result =get_cpu_info(container_name)
new_result = get_cpu_info(container_name)
old_time = old_result['read']
new_time = new_result['read']
list_old_time =re.split('\:|\.',old_time)
list_new_time =re.split('\:|\.',new_time)
old_s = int(list_old_time[2])
new_s = int(list_new_time[2])
if old_s >= new_s:
time_interval = 2
else:
time_interval = new_s - old_s
result=(new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']) / time_interval
elif collect_item == 'cpu_percent':
old_result =get_cpu_info(container_name)
new_result = get_cpu_info(container_name)
old_time = old_result['read']
new_time = new_result['read']
list_old_time =re.split('\:|\.',old_time)
list_new_time =re.split('\:|\.',new_time)
old_s = int(list_old_time[2])
new_s = int(list_new_time[2])
if old_s >= new_s:
time_interval = 2
else:
time_interval = new_s - old_s
cpu_total_usage=(new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']) / time_interval
cpu_system_uasge=(new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']) / time_interval
cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])
result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)
elif collect_item == 'mem_usage':
memory_container_dir=get_memory_container_dir(memory_dir,container_name)
result=commands.getoutput("cat%s/memory.stat|grep '^rss'|grep -v grep|awk '{print $NF}'" %memory_container_dir)
elif collect_item == 'mem_limit':
memory_container_dir=get_memory_container_dir(memory_dir,container_name)
result=commands.getoutput("cat%s/memory.limit_in_bytes" % memory_container_dir)
elif collect_item == 'mem_percent':
memory_container_dir=get_memory_container_dir(memory_dir,container_name)
mem_usage=commands.getoutput("cat%s/memory.stat|grep '^rss'|grep -v grep|awk '{print $NF}'" %memory_container_dir)
mem_limit=commands.getoutput("cat%s/memory.limit_in_bytes" % memory_container_dir)
result=round(float(mem_usage)/float(mem_limit)*100.0,2)
elif collect_item == 'network_rx_bytes':
network_check_command="""sudo /usr/bin/docker exec %s ifconfigeth1|grep bytes|awk -F':' '{print $2,$3}'|awk '{print $1,$6}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name
network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
time.sleep(1)
network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
result=int(network_new_result['rx']) -int(network_old_result['rx'])
elif collect_item == 'network_tx_bytes':
network_check_command="""sudo /usr/bin/docker exec %sifconfig eth1|grep bytes|awk -F':' '{print $2,$3}'|awk '{print $1,$6}'|awk'{print "{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name
network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
time.sleep(1)
network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))
result=int(network_new_result['tx']) -int(network_old_result['tx'])
return result
if __name__ =="__main__":
cpu_dir="/cgroup/cpuacct/docker"
memory_dir="/cgroup/memory/docker"
iflen(sys.argv) == 3:
container_name=sys.argv[1]
collect_item=sys.argv[2]
printcheck_container_stats(container_name,collect_item)
elif len(sys.argv) == 4 and sys.argv[2] =='port':
container_name=sys.argv[1]
collect_item=int(sys.argv[3])
check_stat=commands.getoutput("sudo /usr/bin/docker exec %s netstat-ntpul|grep %s > /dev/null;echo $?" %(container_name,collect_item))
print check_stat
else:
print '1'
腳本2,用來發現容器名
catdiscovery_cons.py
#!/usr/bin/env python
# Felix Shang
#QQ: 279379936
import commands
import sys
def docker_s():
cons = commands.getoutput("""sudo /usr/bin/docker ps|grep-v "CONTAINER ID"|awk '{print $NF}'|tr '\n' ' '""")
count_cons = len(cons.split())
if count_cons != 0:
return cons.split()
else:
return 0
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == 'docker':
infos = docker_s()
if infos != 0:
print '{'
print '\t"data":['
i = 0
cou_infos=len(infos)
for con in infos:
if i == cou_infos - 1:
print'\t\t{"{#CONTAINERNAME}":"%s"}' % con
else:
print'\t\t{"{#CONTAINERNAME}":"%s"},' % con
i = i + 1
print '\t]'
print '}'
腳本3,用來發現容器的服務(tomcat nginx),之前腳本2和腳本3是一個腳本,發現容器時出現好多問題。
# cat/opt/zabbix/script/discovery_docker_service.py
#!/usr/bin/env python
# Felix Shang
#QQ: 279379936
import commands
import sys
def docker_s():
cons = commands.getoutput("""cat /opt/zabbix/script/docker_cons.txt|grep'CONTAINERNAME'|grep -v grep|awk -F'"' '{print $4}'|tr '\n''\t'""")
#print cons
count_cons = len(cons.split())
if count_cons != 0:
return cons.split()
else:
return 0
def tomcat_s():
cons = docker_s()
if cons == 0:
sys.exit(2)
else:
cons_d = {}
for con in cons:
#print con
stat = commands.getoutput("sudo /usr/bin/docker exec %s ps -ef|grepjava|grep tomcat|grep -v grep>/dev/null;echo $?" % con)
port_list = []
if int(stat) == 0:
tomcat_config_dirs =commands.getoutput("sudo /usr/bin/docker exec %s ps -ef | grep tomcat |grep -v grep | awk -F\= '{print $2}' | awk -F'logging' '{print $1}'" %con).split()
for tomcat_config_dir intomcat_config_dirs:
tomcat_config_file =tomcat_config_dir + 'server.xml'
port =commands.getoutput("""sudo /usr/bin/docker exec %s grep"port=" %s|grep -v "shutdown"|grep -v "AJP"|grep"Connector"|awk -F\= '{print $2}'|awk '{print $1}'"""%(con,tomcat_config_file)).strip('"')
port_list.append(port)
cons_d[con] = port_list
else:
cons_d[con] = port_list
return cons_d
def nginx_s():
cons = docker_s()
if cons == 0:
sys.exit(2)
else:
cons_d = {}
for con in cons:
stat = commands.getoutput("sudo /usr/bin/docker exec %s ps -ef|grepnginx|grep -v grep>/dev/null;echo $?" % con)
port_list = []
if int(stat) == 0:
port_list =commands.getoutput("sudo /usr/bin/docker exec %s netstat -ntpul|grepnginx|grep -v 40080|awk '{print $4}'|awk -F\: '{print $NF}'|tr '\n' ' '" %con).split()
cons_d[con] = port_list
else:
cons_d[con] = port_list
return cons_d
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == 'tomcat':
infos = tomcat_s()
print '{'
print '\t"data":['
port_infos = []
for con_info in infos:
if len(infos[con_info]) == 0:
continue
else:
for port in infos[con_info]:
port_info ='\t\t{"{#CONTAINERNAME}":"%s","{#CON_TOMCAT_PORT}":"%s"},'%(con_info,port)
port_infos.append(port_info)
i = 0
cou_port_infos = len(port_infos)
for port_i in port_infos:
if i == cou_port_infos - 1:
port_i = port_i[0:-1]
print port_i
i = i + 1
print '\t]'
print '}'
elif len(sys.argv) == 2 and sys.argv[1] == 'nginx':
infos = nginx_s()
print '{'
print '\t"data":['
port_infos = []
for con_info in infos:
if len(infos[con_info]) == 0:
continue
else:
for port in infos[con_info]:
port_info ='\t\t{"{#CONTAINERNAME}":"%s","{#CON_NGINX_PORT}":"%s"},'%(con_info,port)
port_infos.append(port_info)
i = 0
cou_port_infos = len(port_infos)
for port_i in port_infos:
if i == cou_port_infos - 1:
port_i = port_i[0:-1]
print port_i
i = i + 1
print '\t]'
print '}'
#else:
# help_s()
#vim /etc/sudoers //zabbix_agent是通過zabbix用戶執行,通過sudo提權讓zabbix用戶對腳本有執行權限。
zabbix ALL=(root) NOPASSWD:/usr/bin/docker,/sbin/fdisk,/usr/sbin/dmidecode,/usr/bin/nc
Zabbix服務端的配置:
導入模板:Template docker, 宿主機關聯此模板即可。
報錯:
Server獲取值報錯:ZBX_NOTSUPPORTED][Timeout while executing a shell script.]
# vim zabbix_agentd.conf
# 設置超時時間
Timeout=30