先描述一下需求,我要對公網IP進行監控,檢查響應情況,大概實現起來就是循環執行curl IP/xxx.html ,然後查看http code和響應時間。現在我想把它弄成exporter,接到prometheus上,能夠在響應時間超時,或者502時候發出告警,順便grafana出個圖
prometheus_client文檔地址(完全看不懂orz):https://github.com/prometheus/client_python
import prometheus_client
from prometheus_client import Gauge,start_http_server,Counter
import pycurl
import time
import threading
from io import BytesIO
#創建client_python裏提供的prometheus Counter數據類型
url_http_code = Counter("url_http_code", "request http_code of the host",['code','url'])
url_http_request_time = Counter("url_http_request_time", "request http_request_time of the host",['le','url'])
http_request_total = Counter("http_request_total", "request request total of the host",['url'])
#curl url,返回狀態碼和總共耗時
def test_website(url):
buffer_curl = BytesIO()
c = pycurl.Curl()
c.setopt(c.URL, url)
c.setopt(c.WRITEDATA, buffer_curl)
c.setopt(c.CONNECTTIMEOUT, 3)
c.setopt(c.TIMEOUT, 3)
try:
c.perform()
except pycurl.error:
http_code = 500
http_total_time = 999
else:
http_code = c.getinfo(pycurl.HTTP_CODE)
http_total_time = c.getinfo(pycurl.TOTAL_TIME)
return http_code, http_total_time
#根據curl返回值,統計放到exporter顯示的數據
def count_metric(url):
http_code, http_total_time = test_website(url)
if http_code >= 100 and http_code < 200 :
url_http_code.labels('1xx',url).inc()
elif http_code >= 200 and http_code < 300 :
url_http_code.labels('2xx',url).inc()
elif http_code >= 300 and http_code < 400 :
url_http_code.labels('3xx',url).inc()
elif http_code >= 400 and http_code < 500 :
url_http_code.labels('4xx',url).inc()
else:
url_http_code.labels('5xx',url).inc()
if http_total_time < 1 :
url_http_request_time.labels('1',url).inc()
elif http_total_time < 2 :
url_http_request_time.labels('2',url).inc()
elif http_total_time < 3 :
url_http_request_time.labels('3',url).inc()
else :
url_http_request_time.labels('+Inf',url).inc()
http_request_total.labels(url).inc()
#線程控制,每隔5s執行curl url
def count_threads(url):
while True:
t = threading.Thread(target=count_metric,args=(url,))
t.setDaemon(True)
t.start()
time.sleep(5)
#將每個需要監控的域名起一個進程
if __name__ == '__main__':
start_http_server(9091)
server_list = [
'www.baidu.com',
'www.qq.com',
'blog.csdn.net',
'github.com',
'google.com'
]
threads = []
for url in server_list:
t = threading.Thread(target=count_threads,args=(url,))
threads.append(t)
for thread in threads:
thread.setDaemon(True)
thread.start()
thread.join()
這裏我用獨立線程控制pycurl,每秒執行一次
訪問9091端口,顯示出來下面數據,不知道爲啥全自動加上了_total
# HELP url_http_code_total request http_code of the host
# TYPE url_http_code_total counter
url_http_code_total{code="3xx",url="blog.csdn.net"} 563.0
url_http_code_total{code="2xx",url="www.baidu.com"} 562.0
url_http_code_total{code="3xx",url="www.qq.com"} 563.0
url_http_code_total{code="3xx",url="github.com"} 555.0
url_http_code_total{code="5xx",url="google.com"} 562.0
url_http_code_total{code="5xx",url="github.com"} 8.0
url_http_code_total{code="5xx",url="www.baidu.com"} 1.0
# TYPE url_http_code_created gauge
url_http_code_created{code="3xx",url="blog.csdn.net"} 1.5511035889801528e+09
url_http_code_created{code="2xx",url="www.baidu.com"} 1.5511035889983172e+09
url_http_code_created{code="3xx",url="www.qq.com"} 1.551103589051125e+09
url_http_code_created{code="3xx",url="github.com"} 1.5511035896066923e+09
url_http_code_created{code="5xx",url="google.com"} 1.5511035919719362e+09
url_http_code_created{code="5xx",url="github.com"} 1.5511040173621314e+09
url_http_code_created{code="5xx",url="www.baidu.com"} 1.5511053585579858e+09
# HELP url_http_request_time_total request http_request_time of the host
# TYPE url_http_request_time_total counter
url_http_request_time_total{le="1",url="blog.csdn.net"} 563.0
url_http_request_time_total{le="1",url="www.baidu.com"} 547.0
url_http_request_time_total{le="1",url="www.qq.com"} 563.0
url_http_request_time_total{le="1",url="github.com"} 416.0
url_http_request_time_total{le="+Inf",url="google.com"} 562.0
url_http_request_time_total{le="2",url="github.com"} 113.0
url_http_request_time_total{le="3",url="github.com"} 26.0
url_http_request_time_total{le="+Inf",url="github.com"} 8.0
url_http_request_time_total{le="2",url="www.baidu.com"} 15.0
url_http_request_time_total{le="+Inf",url="www.baidu.com"} 1.0
# TYPE url_http_request_time_created gauge
url_http_request_time_created{le="1",url="blog.csdn.net"} 1.5511035889801846e+09
url_http_request_time_created{le="1",url="www.baidu.com"} 1.5511035889983401e+09
url_http_request_time_created{le="1",url="www.qq.com"} 1.5511035890511582e+09
url_http_request_time_created{le="1",url="github.com"} 1.5511035896067169e+09
url_http_request_time_created{le="+Inf",url="google.com"} 1.5511035919719641e+09
url_http_request_time_created{le="2",url="github.com"} 1.551103605623951e+09
url_http_request_time_created{le="3",url="github.com"} 1.5511039065420728e+09
url_http_request_time_created{le="+Inf",url="github.com"} 1.551104017362157e+09
url_http_request_time_created{le="2",url="www.baidu.com"} 1.551104911398178e+09
url_http_request_time_created{le="+Inf",url="www.baidu.com"} 1.5511053585580106e+09
# HELP http_request_total request request total of the host
# TYPE http_request_total counter
http_request_total{url="blog.csdn.net"} 563.0
http_request_total{url="www.baidu.com"} 563.0
http_request_total{url="www.qq.com"} 563.0
http_request_total{url="github.com"} 563.0
http_request_total{url="google.com"} 562.0
# TYPE http_request_created gauge
http_request_created{url="blog.csdn.net"} 1.551103588980202e+09
http_request_created{url="www.baidu.com"} 1.5511035889983532e+09
http_request_created{url="www.qq.com"} 1.5511035890511742e+09
http_request_created{url="github.com"} 1.5511035896067343e+09
http_request_created{url="google.com"} 1.5511035919719923e+09
接入到prometheus後,grafana畫圖
這個是http code的每分鐘增長率,如果出現5xx,就說明有問題了
這個是顯示期望時間的比例,比如只顯示小於1秒,佔總次數的比例
delta(url_http_request_time_total{le='1'}[1m]) / on(url) group_left delta(http_request_total[1m])