python開發prometheus exporter

先描述一下需求,我要對公網IP進行監控,檢查響應情況,大概實現起來就是循環執行curl IP/xxx.html ,然後查看http code和響應時間。現在我想把它弄成exporter,接到prometheus上,能夠在響應時間超時,或者502時候發出告警,順便grafana出個圖
prometheus_client文檔地址(完全看不懂orz):https://github.com/prometheus/client_python

import prometheus_client
from prometheus_client import Gauge,start_http_server,Counter
import pycurl
import time
import threading
from io import BytesIO
#創建client_python裏提供的prometheus Counter數據類型
url_http_code = Counter("url_http_code", "request http_code of the host",['code','url'])
url_http_request_time = Counter("url_http_request_time", "request http_request_time of the host",['le','url'])
http_request_total = Counter("http_request_total", "request request total of the host",['url'])
#curl url,返回狀態碼和總共耗時
def test_website(url):
    buffer_curl = BytesIO()
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(c.WRITEDATA, buffer_curl)
    c.setopt(c.CONNECTTIMEOUT, 3)
    c.setopt(c.TIMEOUT, 3)
    try:
        c.perform()
    except pycurl.error:
        http_code = 500
        http_total_time = 999
    else:
        http_code = c.getinfo(pycurl.HTTP_CODE)
        http_total_time = c.getinfo(pycurl.TOTAL_TIME)
    return http_code, http_total_time
#根據curl返回值,統計放到exporter顯示的數據
def count_metric(url):
    http_code, http_total_time = test_website(url)
    if http_code >= 100 and http_code < 200 :
        url_http_code.labels('1xx',url).inc()
    elif http_code >= 200 and http_code < 300 :
        url_http_code.labels('2xx',url).inc()
    elif http_code >= 300 and http_code < 400 :
        url_http_code.labels('3xx',url).inc()
    elif http_code >= 400 and http_code < 500 :
        url_http_code.labels('4xx',url).inc()
    else:
        url_http_code.labels('5xx',url).inc()
    if http_total_time < 1 :
        url_http_request_time.labels('1',url).inc()
    elif http_total_time < 2 :
        url_http_request_time.labels('2',url).inc()
    elif http_total_time < 3 :
        url_http_request_time.labels('3',url).inc()
    else :
        url_http_request_time.labels('+Inf',url).inc()
    http_request_total.labels(url).inc()
#線程控制,每隔5s執行curl url
def count_threads(url):
    while True:
        t = threading.Thread(target=count_metric,args=(url,))
        t.setDaemon(True)
        t.start()
        time.sleep(5)
#將每個需要監控的域名起一個進程
if __name__ == '__main__':
    start_http_server(9091)
    server_list = [
            'www.baidu.com',
            'www.qq.com',
            'blog.csdn.net',
            'github.com',
            'google.com'
            ]
    threads = []
    for url in server_list:
        t = threading.Thread(target=count_threads,args=(url,))
        threads.append(t)
    for thread in threads:
        thread.setDaemon(True)
        thread.start()
    thread.join()

這裏我用獨立線程控制pycurl,每秒執行一次
訪問9091端口,顯示出來下面數據,不知道爲啥全自動加上了_total

# HELP url_http_code_total request http_code of the host
# TYPE url_http_code_total counter
url_http_code_total{code="3xx",url="blog.csdn.net"} 563.0
url_http_code_total{code="2xx",url="www.baidu.com"} 562.0
url_http_code_total{code="3xx",url="www.qq.com"} 563.0
url_http_code_total{code="3xx",url="github.com"} 555.0
url_http_code_total{code="5xx",url="google.com"} 562.0
url_http_code_total{code="5xx",url="github.com"} 8.0
url_http_code_total{code="5xx",url="www.baidu.com"} 1.0
# TYPE url_http_code_created gauge
url_http_code_created{code="3xx",url="blog.csdn.net"} 1.5511035889801528e+09
url_http_code_created{code="2xx",url="www.baidu.com"} 1.5511035889983172e+09
url_http_code_created{code="3xx",url="www.qq.com"} 1.551103589051125e+09
url_http_code_created{code="3xx",url="github.com"} 1.5511035896066923e+09
url_http_code_created{code="5xx",url="google.com"} 1.5511035919719362e+09
url_http_code_created{code="5xx",url="github.com"} 1.5511040173621314e+09
url_http_code_created{code="5xx",url="www.baidu.com"} 1.5511053585579858e+09
# HELP url_http_request_time_total request http_request_time of the host
# TYPE url_http_request_time_total counter
url_http_request_time_total{le="1",url="blog.csdn.net"} 563.0
url_http_request_time_total{le="1",url="www.baidu.com"} 547.0
url_http_request_time_total{le="1",url="www.qq.com"} 563.0
url_http_request_time_total{le="1",url="github.com"} 416.0
url_http_request_time_total{le="+Inf",url="google.com"} 562.0
url_http_request_time_total{le="2",url="github.com"} 113.0
url_http_request_time_total{le="3",url="github.com"} 26.0
url_http_request_time_total{le="+Inf",url="github.com"} 8.0
url_http_request_time_total{le="2",url="www.baidu.com"} 15.0
url_http_request_time_total{le="+Inf",url="www.baidu.com"} 1.0
# TYPE url_http_request_time_created gauge
url_http_request_time_created{le="1",url="blog.csdn.net"} 1.5511035889801846e+09
url_http_request_time_created{le="1",url="www.baidu.com"} 1.5511035889983401e+09
url_http_request_time_created{le="1",url="www.qq.com"} 1.5511035890511582e+09
url_http_request_time_created{le="1",url="github.com"} 1.5511035896067169e+09
url_http_request_time_created{le="+Inf",url="google.com"} 1.5511035919719641e+09
url_http_request_time_created{le="2",url="github.com"} 1.551103605623951e+09
url_http_request_time_created{le="3",url="github.com"} 1.5511039065420728e+09
url_http_request_time_created{le="+Inf",url="github.com"} 1.551104017362157e+09
url_http_request_time_created{le="2",url="www.baidu.com"} 1.551104911398178e+09
url_http_request_time_created{le="+Inf",url="www.baidu.com"} 1.5511053585580106e+09
# HELP http_request_total request request total of the host
# TYPE http_request_total counter
http_request_total{url="blog.csdn.net"} 563.0
http_request_total{url="www.baidu.com"} 563.0
http_request_total{url="www.qq.com"} 563.0
http_request_total{url="github.com"} 563.0
http_request_total{url="google.com"} 562.0
# TYPE http_request_created gauge
http_request_created{url="blog.csdn.net"} 1.551103588980202e+09
http_request_created{url="www.baidu.com"} 1.5511035889983532e+09
http_request_created{url="www.qq.com"} 1.5511035890511742e+09
http_request_created{url="github.com"} 1.5511035896067343e+09
http_request_created{url="google.com"} 1.5511035919719923e+09

接入到prometheus後,grafana畫圖
這個是http code的每分鐘增長率,如果出現5xx,就說明有問題了
在這裏插入圖片描述

這個是顯示期望時間的比例,比如只顯示小於1秒,佔總次數的比例

delta(url_http_request_time_total{le='1'}[1m]) / on(url) group_left delta(http_request_total[1m])

在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章