python腳本:自動檢測rrd文件並羣發報警郵件

腳本背景:

我所在的公司爲運營CDN業務的IDC公司,客戶域名的流量圖經常會出現毛刺,但是服務的域名非常多,每天挨個流量圖看耗時耗力。因此用python寫了個可以自動檢測異常rrd裏異常數值併發送報警郵件的腳本。


由於我們的rrd文件是以服務域名命名的,所以先在相應的API上獲取服務域名,然後根據域名掃描rrd文件。我設的是掃描半小時的數值,每10分鐘執行一次,大概有2000來個rrd文件,執行一次6、7秒左右。


代碼如下:


#!/usr/bin/env python
#coding:utf-8
from pyrrd.graph import DEF,CDEF,AREA
from pyrrd.graph import Graph
from pyrrd.graph import ColorAttributes
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.p_w_picpath import MIMEImage
from datetime import datetime
import calendar
import os
import time
import urllib2
import smtplib
import email
import sys
def graphrrd(files):
    now_utc =calendar.timegm(datetime.utcnow().utctimetuple())
    def1 = DEF(rrdfile=files, vname='back',dsName='RX')
    def2 = DEF(rrdfile=files, vname='CDN',dsName='TX')
    cdef1 = CDEF(vname='back_flow',rpn='%s,0.026,*' % def1.vname)
    cdef2 = CDEF(vname='CDN_flow',rpn='%s,0.026,*' % def2.vname)
    area1 = AREA(defObj=cdef1, color='#002A97FF', legend='back_flow')
    area2 = AREA(defObj=cdef2, color='#00CF00FF', legend='CDN_flow')
    ca = ColorAttributes()
    ca.back = '#333333'
    ca.canvas = '#333333'
    ca.shadea = '#000000'
    ca.shadeb = '#111111'
    ca.mgrid = '#CCCCCC'
    ca.axis = '#FFFFFF'
    ca.frame = '#AAAAAA'
    ca.font = '#FFFFFF'
    ca.arrow = '#FFFFFF'
    graphfile = p_w_picpath_dir
    title_url=files[23:-4]
    g = Graph(graphfile, start= now_utc-43200, end= now_utc,vertical_label='flow',title=title_url )
    g.data.extend([def1, def2, cdef1, cdef2, area2, area1])
    g.write()
def connect():
    server=smtplib.SMTP(smtpserver)
    server.ehlo()
    server.login(smtpuser,smtppass)
    return server
def sendmessage(server,to,subj,content):
    msg = MIMEMultipart('related')
    msg['Subject'] = subj
    msg['From']    = smtpuser
    msg['To']      = to
    msg['Date']    = email.Utils.formatdate()   
    msgText = MIMEText(content,"html", "utf-8")
    msg.attach(msgText)
    fp = open(p_w_picpath_dir, 'rb')
    msgImage = MIMEImage(fp.read())
    fp.close()
    msgImage.add_header('Content-ID', '<p_w_picpath1>')
    msg.attach(msgImage)
    try:
        server.sendmail(smtpuser, to, msg.as_string())
    except Exception ,ex:
        print Exception,ex
        print 'Error - send failed'
def aver(rrd_file,n=6):
    global dict_data
    sum1=0
    sum2=0
    sum3=0
    data = os.popen('rrdtool fetch %s AVERAGE -s -1d | tail -%d | grep -v nan| grep -v RX ' % (rrd_file,n)).readlines()
    if len(data)< (n/2):
        log("[ERRORS: %s] has not enough record ! please check it!!\n" % rrd_file)
        return []
    for i in data:
        if len(i) > 25:
            dict_data[i[:10]]=i.strip()[12:].split()
    for i in dict_data.values():
        try:
            sum1 = sum1+float(i[0])
            sum2 = sum2+float(i[1])
            sum3 = sum3+float(i[2])
        except:
            log('%s %s\n' % (rrd_file,i))
    if sum2/len(data) < 3500000000:
        log('WARNING: %s was less then 200M\n' % rrd_file)
        return []
    return [sum1/len(data),sum2/len(data),sum3/len(data)]
def check(average):
    wrong_t=[]
    for key in dict_data:
        if float(dict_data[key][1])/average > 1.6:
            wrong_t.append(key)
    return wrong_t
                          
def update(rrd_file,t,aver1,aver2,aver3):
    global text
    global dict_data
    errors_time=os.popen('date -d "1970-01-01 UTC %s seconds"' % t).readline().strip()
    content = '<br/><br/>%s 異常信息:<br/>&nbsp;&nbsp;&nbsp; 域名:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; %s <br/>&nbsp;&nbsp;&nbsp; 時間:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; %s<br/>&nbsp;&nbsp;&nbsp; 流量值:&nbsp;&nbsp;&nbsp;&nbsp; 回源帶寬: %.2fM , cdn帶寬 : %dM <br/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <br/>rrd 異常信息:<br/>&nbsp;&nbsp;&nbsp; 路徑:&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; %s<br/>&nbsp;&nbsp;&nbsp; UTC 時間:&nbsp;&nbsp;&nbsp; %s<br/>&nbsp;&nbsp;&nbsp; 異常值:&nbsp;&nbsp;&nbsp;&nbsp; [%s], [%s], [%s]<br/><br/><img src="cid:p_w_picpath1">' % (rrd_file[23:-4],rrd_file[23:-4],errors_time,float(dict_data[t][0])*8/300000000,int(float(dict_data[t][1])*8/300000000),rrd_file,t,dict_data[t][0],dict_data[t][1],dict_data[t][2])
    write_error('[ %s ]: at[ %s(%s) ],the value was [%s] [%s] [%s] \n' %(rrd_file,errors_time,t,dict_data[t][0],dict_data[t][1],dict_data[t][2]))
    text = text + content
def log(log_write):
    f = open('%s/rrd_alt1.log' % rrd_bak, 'a') 
    f.write(log_write)
    f.close()
def write_error(log_write):
    f = open('%s/rrd_error1.log' % rrd_bak, 'a')
    f.write(log_write)
    f.close
                  
def run_script(rrd_file):
    global to_all
    global text
    aver_rrd=aver(rrd_file)
    if len(aver_rrd) == 0:
        return  
    wrong_time=check(aver_rrd[1])
    if len(wrong_time)==0:
        log('[%s] no errors !\n' % (rrd_file))
        return
    for t in wrong_time:
        update(rrd_file,t,aver_rrd[0],aver_rrd[1],aver_rrd[2]) 
    graphrrd(rrd_file)
    if text:
        for to in to_all:
            server=connect()
            sendmessage(server,to,subj,text)
            log('sendmail to %s\n' % to)   
if __name__=='__main__':
    p_w_picpath_time=time.strftime("%d-%H-%M")
    rrd_dir='/data/rrd/db/1/billing'
    rrd_bak='/data/rrd/db/1/billing/bak'
    smtpserver='xxx'
    p_w_picpath_dir='%s/rrdgraph_%s.png' % (rrd_bak,p_w_picpath_time)
    smtpuser='xxx'
    smtppass='yyy'
    to_all=['xxx','yyy']
    subj='check the flow of CDN!!!!'
    while True:
        url_list=[]
        local_time = time.strftime("%m-%d %H:%M:%S")
        url=urllib2.urlopen('xxx').readlines()
        for u in url:
            a = "%s/%s.rrd" % (rrd_dir,u.strip())
            url_list.append(a)
        log("-"*60+"\n")
        log("the script run time at %s \n" % local_time)
        while len(url_list):
            text=''
            dict_data={}
            rrd_file = url_list.pop()
            if os.path.exists(rrd_file):
                run_script(rrd_file)
            else:
                continue          
        log("-"*60+"\n")
    break

郵件截圖

173433350.jpg


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章