zabbix監控docker

當然思路和腳本參考了網上的,但網上的那些有好多錯誤,以下爲本人經過更改調試後的。

如有疑問可以聯繫我 QQ: 279379936,一起改進優化。


Centos6下安裝easy_install

# yum install python-setuptools

 

安裝python docker模塊

# easy_install docker-py

 

sudo:sorry, you must have a tty to run sudo

使用不同賬戶,執行執行腳本時候sudo經常會碰到 sudo: sorry, you must have a tty to run sudo這個情況,其實修改一下sudo的配置就好了

# vim /etc/sudoers (最好用visudo命令)

註釋掉 Default requiretty 一行

#Default requiretty

意思就是sudo默認需要tty終端。註釋掉就可以在後臺執行了。

 

Zabbix客戶端的部署:

#vim /opt/zabbix/etc/zabbix_agentd.conf

 

#docker

UserParameter=docker_discovery[*],cat/opt/zabbix/script/docker_cons.txt    //用來發現宿主機上存活的容器

UserParameter=docker_stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 //用來監控容器的各種指標,後面會腳本具體體現,看不懂腳本的請路過。

UserParameter=docker.tomcat.discovery,cat/opt/zabbix/script/docker_tomcat.txt  //用來發現容器啓動的tomcat服務

UserParameter=docker.tomcat.stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 $3  //用來監控容器中tomcat的端口

UserParameter=docker.nginx.discovery,cat/opt/zabbix/script/docker_nginx.txt  //用來發現容器啓動的nginx服務

UserParameter=docker.nginx.stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 $3 //用來監控容器中nginx的端口

 

監控腳本1,用來監控容器的CPU 內存 網卡,服務端口

#cat /opt/zabbix/script/zabbix_monitor_docker.py

 

#!/usr/bin/envpython

#-*- coding:utf-8 -*-

#email:[email protected]

 

from dockerimport Client

import sys

import subprocess

import os

import time

import commands

 

defcheck_container_stats(container_name,collect_item):

   container_collect=docker_client.stats(container_name)

    container_collect.next()

    old_result=eval(container_collect.next())

    new_result=eval(container_collect.next())

    container_collect.close()

    if collect_item == 'cpu_total_usage':

       result=new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']

    elif collect_item == 'cpu_system_usage':

       result=new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']

    elif collect_item == 'cpu_percent':

       cpu_total_usage=new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']

       cpu_system_uasge=new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']

       cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])

        result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)

    elif collect_item == 'mem_usage':

       result=new_result['memory_stats']['usage']

    elif collect_item == 'mem_limit':

       result=new_result['memory_stats']['limit']

    elif collect_item == 'mem_percent':

       mem_usage=new_result['memory_stats']['usage']

       mem_limit=new_result['memory_stats']['limit']

       result=round(float(mem_usage)/float(mem_limit)*100.0,2)

    elif collect_item == 'network_rx_bytes':

        network_check_command="""dockerexec %s ifconfig eth1|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print$1,$2}'|awk -F ')' '{print $1}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name

       network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        #print time.time()

        #print network_old_result

        time.sleep(1)

       network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        #print time.time()

        #print network_new_result

        #unit b

        result=int(network_new_result['rx']) -int(network_old_result['rx'])

    elif collect_item == 'network_tx_bytes':

        network_check_command="""dockerexec %s ifconfig eth1|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print$1,$2}'|awk -F ')' '{print $1}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name

       network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        time.sleep(1)

        network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        result=int(network_new_result['tx']) -int(network_old_result['tx'])

    return result

if __name__ =="__main__":

    docker_client = Client(base_url='unix://var/run/docker.sock',version='1.19')

    if len(sys.argv) == 3:

        container_name=sys.argv[1]

        collect_item=sys.argv[2]

        printcheck_container_stats(container_name,collect_item)

    elif len(sys.argv) == 4 and sys.argv[2] =='port':

        container_name=sys.argv[1]

        collect_item=int(sys.argv[3])

       check_stat=commands.getoutput("/usr/bin/docker exec %s netstat-ntpul|grep %s > /dev/null;echo $?" %(container_name,collect_item))

        print check_stat

    else:

        print '1'

說明:上面腳本爲通過pythondocker模塊去抓取數據,由於各種原因,有些機器無法安裝python模塊,可通過下面腳本實現:

# cat /opt/zabbix/script/zabbix_monitor_docker.py

#!/usr/bin/envpython

#-*- coding:utf-8 -*-

#email:[email protected]

 

import sys

importsubprocess

import time

import commands

import re

 

defget_memory_container_dir(memory_dir,container_name):

    con_id=commands.getoutput("sudo/usr/bin/docker ps|grep %s|awk '{print $1}'" % container_name)

    con_full_id=commands.getoutput("ls -al%s|grep '%s'|grep -v grep|awk '{print $NF}'" % (memory_dir,con_id))

    memory_container_dir=memory_dir + '/' +con_full_id

    return memory_container_dir

 

defget_cpu_container_dir(cpu_dir,container_name):

    con_id=commands.getoutput("sudo/usr/bin/docker ps|grep %s|awk '{print $1}'" % container_name)

    con_full_id=commands.getoutput("ls -al%s|grep '%s'|grep -v grep|awk '{print $NF}'" % (cpu_dir,con_id))

    cpu_container_dir=cpu_dir + '/' +con_full_id

    return cpu_container_dir

defget_cpu_info(container_name):

    info = commands.getoutput('echo -ne"GET /containers/%s/stats?stream=false HTTP/1.1\r\n\r\n"|sudo/usr/bin/nc -U /var/run/docker.sock|grep read' % container_name)

    info = eval(info)

    return info

 

defcheck_container_stats(container_name,collect_item):

    if collect_item == 'cpu_total_usage':

        old_result =get_cpu_info(container_name)

        new_result =get_cpu_info(container_name)

        old_time = old_result['read']

        new_time = new_result['read']

        list_old_time = re.split('\:|\.',old_time)

        list_new_time =re.split('\:|\.',new_time)

        old_s = int(list_old_time[2])

        new_s = int(list_new_time[2])

        if old_s >= new_s:

            time_interval = 2

        else:

            time_interval = new_s - old_s

        result=(new_result['cpu_stats']['cpu_usage']['total_usage']- old_result['cpu_stats']['cpu_usage']['total_usage']) / time_interval

    elif collect_item == 'cpu_system_usage':

 

        old_result =get_cpu_info(container_name)

        new_result = get_cpu_info(container_name)

        old_time = old_result['read']

        new_time = new_result['read']

        list_old_time =re.split('\:|\.',old_time)

        list_new_time =re.split('\:|\.',new_time)

        old_s = int(list_old_time[2])

        new_s = int(list_new_time[2])

        if old_s >= new_s:

            time_interval = 2

        else:

            time_interval = new_s - old_s

       result=(new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']) / time_interval

    elif collect_item == 'cpu_percent':

 

        old_result =get_cpu_info(container_name)

        new_result = get_cpu_info(container_name)

        old_time = old_result['read']

        new_time = new_result['read']

        list_old_time =re.split('\:|\.',old_time)

        list_new_time =re.split('\:|\.',new_time)

        old_s = int(list_old_time[2])

        new_s = int(list_new_time[2])

        if old_s >= new_s:

            time_interval = 2

        else:

            time_interval = new_s - old_s

       cpu_total_usage=(new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']) / time_interval

       cpu_system_uasge=(new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']) / time_interval

       cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])

        result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)

    elif collect_item == 'mem_usage':

 

       memory_container_dir=get_memory_container_dir(memory_dir,container_name)

        result=commands.getoutput("cat%s/memory.stat|grep '^rss'|grep -v grep|awk '{print $NF}'" %memory_container_dir)

    elif collect_item == 'mem_limit':

 

       memory_container_dir=get_memory_container_dir(memory_dir,container_name)

        result=commands.getoutput("cat%s/memory.limit_in_bytes" % memory_container_dir)

    elif collect_item == 'mem_percent':

 

       memory_container_dir=get_memory_container_dir(memory_dir,container_name)

        mem_usage=commands.getoutput("cat%s/memory.stat|grep '^rss'|grep -v grep|awk '{print $NF}'" %memory_container_dir)

        mem_limit=commands.getoutput("cat%s/memory.limit_in_bytes" % memory_container_dir)

       result=round(float(mem_usage)/float(mem_limit)*100.0,2)

    elif collect_item == 'network_rx_bytes':

 

       network_check_command="""sudo /usr/bin/docker exec %s ifconfigeth1|grep bytes|awk -F':' '{print $2,$3}'|awk '{print $1,$6}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name

       network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        time.sleep(1)

       network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        result=int(network_new_result['rx']) -int(network_old_result['rx'])

    elif collect_item == 'network_tx_bytes':

 

       network_check_command="""sudo /usr/bin/docker exec %sifconfig eth1|grep bytes|awk -F':' '{print $2,$3}'|awk '{print $1,$6}'|awk'{print "{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name

        network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        time.sleep(1)

       network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()[0]).strip('\n'))

        result=int(network_new_result['tx']) -int(network_old_result['tx'])

    return result

 

if __name__ =="__main__":

    cpu_dir="/cgroup/cpuacct/docker"

   memory_dir="/cgroup/memory/docker"

    iflen(sys.argv) == 3:

        container_name=sys.argv[1]

        collect_item=sys.argv[2]

        printcheck_container_stats(container_name,collect_item)

    elif len(sys.argv) == 4 and sys.argv[2] =='port':

        container_name=sys.argv[1]

        collect_item=int(sys.argv[3])

       check_stat=commands.getoutput("sudo /usr/bin/docker exec %s netstat-ntpul|grep %s > /dev/null;echo $?" %(container_name,collect_item))

        print check_stat

    else:

        print '1'

 

腳本2,用來發現容器名

catdiscovery_cons.py

#!/usr/bin/env python

 

# Felix Shang

#QQ: 279379936

import commands

import sys

 

def docker_s():

   cons = commands.getoutput("""sudo /usr/bin/docker ps|grep-v "CONTAINER ID"|awk '{print $NF}'|tr '\n' ' '""")

   count_cons = len(cons.split())

   if count_cons != 0:

       return cons.split()

   else:

       return 0

 

if __name__ == "__main__":

   if len(sys.argv) == 2 and sys.argv[1] == 'docker':

       infos = docker_s()

       if infos != 0:

           print '{'

           print '\t"data":['

           i = 0

           cou_infos=len(infos)

           for con in infos:

                if i == cou_infos - 1:

                    print'\t\t{"{#CONTAINERNAME}":"%s"}' % con

                else:

                    print'\t\t{"{#CONTAINERNAME}":"%s"},' % con

                i = i + 1

           print '\t]'

           print '}'

腳本3,用來發現容器的服務(tomcat nginx),之前腳本2和腳本3是一個腳本,發現容器時出現好多問題。

# cat/opt/zabbix/script/discovery_docker_service.py

#!/usr/bin/env python

 

# Felix Shang

#QQ: 279379936

import commands

import sys

 

def docker_s():

   cons = commands.getoutput("""cat /opt/zabbix/script/docker_cons.txt|grep'CONTAINERNAME'|grep -v grep|awk -F'"' '{print $4}'|tr '\n''\t'""")

   #print cons

   count_cons = len(cons.split())

   if count_cons != 0:

       return cons.split()

   else:

       return 0

def tomcat_s():

    cons = docker_s()

   if cons == 0:

       sys.exit(2)

   else:

       cons_d = {}

       for con in cons:

           #print con

           stat = commands.getoutput("sudo /usr/bin/docker exec %s ps -ef|grepjava|grep tomcat|grep -v grep>/dev/null;echo $?" % con)

           port_list = []

           if int(stat) == 0:

                tomcat_config_dirs =commands.getoutput("sudo /usr/bin/docker exec %s ps -ef | grep tomcat |grep -v grep | awk -F\= '{print $2}' | awk -F'logging' '{print $1}'" %con).split()

                for tomcat_config_dir intomcat_config_dirs:

                    tomcat_config_file =tomcat_config_dir + 'server.xml'

                    port =commands.getoutput("""sudo /usr/bin/docker exec %s grep"port=" %s|grep -v "shutdown"|grep -v "AJP"|grep"Connector"|awk -F\= '{print $2}'|awk '{print $1}'"""%(con,tomcat_config_file)).strip('"')

                    port_list.append(port)

                cons_d[con] = port_list

           else:

                cons_d[con] = port_list

       return cons_d

def nginx_s():

   cons = docker_s()

   if cons == 0:

       sys.exit(2)

   else:

       cons_d = {}

       for con in cons:

           stat = commands.getoutput("sudo /usr/bin/docker exec %s ps -ef|grepnginx|grep -v grep>/dev/null;echo $?" % con)

           port_list = []

           if int(stat) == 0:

                port_list =commands.getoutput("sudo /usr/bin/docker exec %s netstat -ntpul|grepnginx|grep -v 40080|awk '{print $4}'|awk -F\: '{print $NF}'|tr '\n' ' '" %con).split()

                cons_d[con] = port_list

           else:

                cons_d[con] = port_list

       return cons_d

 

if __name__ == "__main__":

   if len(sys.argv) == 2 and sys.argv[1] == 'tomcat':

       infos = tomcat_s()

       print '{'

       print '\t"data":['

       port_infos = []

       for con_info in infos:

           if len(infos[con_info]) == 0:

                continue

           else:

                for port in infos[con_info]:

                    port_info ='\t\t{"{#CONTAINERNAME}":"%s","{#CON_TOMCAT_PORT}":"%s"},'%(con_info,port)

                   port_infos.append(port_info)

       i = 0

        cou_port_infos = len(port_infos)

       for port_i in port_infos:

           if i == cou_port_infos - 1:

                port_i = port_i[0:-1]

           print port_i

           i = i + 1

 

       print '\t]'

       print '}'

           

   elif len(sys.argv) == 2 and sys.argv[1] == 'nginx':

       infos = nginx_s()

       print '{'

       print '\t"data":['

       port_infos = []

       for con_info in infos:

           if len(infos[con_info]) == 0:

                continue

           else:

                for port in infos[con_info]:

                    port_info ='\t\t{"{#CONTAINERNAME}":"%s","{#CON_NGINX_PORT}":"%s"},'%(con_info,port)

                   port_infos.append(port_info)

       i = 0

       cou_port_infos = len(port_infos)

       for port_i in port_infos:

           if i == cou_port_infos - 1:

                port_i = port_i[0:-1]

           print port_i

           i = i + 1

 

       print '\t]'

       print '}'

   #else:

#    help_s()

 

#vim /etc/sudoers   //zabbix_agent是通過zabbix用戶執行,通過sudo提權讓zabbix用戶對腳本有執行權限。

zabbix    ALL=(root) NOPASSWD:/usr/bin/docker,/sbin/fdisk,/usr/sbin/dmidecode,/usr/bin/nc

 

Zabbix服務端的配置:

導入模板:Template docker, 宿主機關聯此模板即可。

 

 

報錯:

Server獲取值報錯:ZBX_NOTSUPPORTED][Timeout while executing a shell script.]

# vim zabbix_agentd.conf

# 設置超時時間

Timeout=30


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章