Dr-Elephant收集到的YARN JOB信息後,存入對應的mysql表(yarn_app_result、yarn_app_heuristic_result、yarn_app_heuristic_result_details)
- yarn_app_result (job是否有異常)
- yarn_app_heuristic_result (job的異常指標)
- yarn_app_heuristic_result_details (job的異常指標對應的詳細說明)
通過mysql表裏收集到的數據,進行監控異常並且報警
最後報警信息展示如下:
Code Demo
# coding: utf-8
import torndb
import time
import sys
import requests
reload(sys)
sys.setdefaultencoding('utf-8')
class DingDingUtil(object):
dingtalk_url = "https://oapi.dingtalk.com/robot/send?access_token=xxxx"
@staticmethod
def send(msg):
params = """{"msgtype": "text","text": {"content": "%s"},"at": {"isAtAll": true}}""" % msg
headers = {"Content-Type": "application/json"}
requests.post(DingDingUtil.dingtalk_url, data=params, headers=headers)
class DrElephantMonitor(object):
severity_name_dict = {
0: "None",
1: "Low",
2: "Moderate",
3: "Severe",
4: "Critical"
}
mysql_client = torndb.Connection("host", "db",
user="user", password="pass", time_zone='+8:00')
@staticmethod
def query_yarn_app_heuristic_result_details(yarn_app_heuristic_result_id):
"""
:param yarn_app_heuristic_result_id: YARN JOB METRICS ID
:return: 異常指標詳情
"""
output_heuristic_result_details = ""
sql = """
select
name, value
from yarn_app_heuristic_result_details
where yarn_app_heuristic_result_id = %s
""" % yarn_app_heuristic_result_id
heuristic_result_details_list = DrElephantMonitor.mysql_client.query(sql)
for heuristic_result_details in heuristic_result_details_list:
output_heuristic_result_details += '{:<40}{:<}\n'.format(heuristic_result_details['name'],heuristic_result_details['value'])
return output_heuristic_result_details
@staticmethod
def query_yarn_app_heuristic_result(yarn_app_result_id):
"""
:param yarn_app_result_id: YARN JOB ID
:return: 返回待優化的JOB ID對應的異常指標值
"""
output_heuristic_result = ""
sql = """
select id, heuristic_name, severity, score
from yarn_app_heuristic_result
where yarn_app_result_id = '%s'
and severity >= 3
""" % yarn_app_result_id
heuristic_result_list = DrElephantMonitor.mysql_client.query(sql)
for heuristic_result in heuristic_result_list:
heuristic_result_id = heuristic_result['id']
heuristic_name = heuristic_result['heuristic_name']
severity_name = DrElephantMonitor.severity_name_dict[heuristic_result['severity']]
score = heuristic_result['score']
output_heuristic_result_details = DrElephantMonitor.query_yarn_app_heuristic_result_details(
heuristic_result_id)
output_heuristic_result += '\n異常指標: {:<} 嚴重程度: {:<} 待優化指數: {:<}\n{:<}\n'.format(heuristic_name,
severity_name, score,
output_heuristic_result_details)
return output_heuristic_result
@staticmethod
def query_yarn_app_result():
"""
:return: 找到最近時間段待優化 YARN JOB
"""
now_timestamp_ms = int(time.time() * 1000)
begin_timestamp_ms = now_timestamp_ms - (10 * 60 * 1000)
sql = """
select
id, job_name, job_def_id
from yarn_app_result
where finish_time <= %s and finish_time >= %s
and severity >= 3
order by score desc limit 5
""" % (now_timestamp_ms, begin_timestamp_ms)
yarn_app_result_list = DrElephantMonitor.mysql_client.query(sql)
for yarn_app_result in yarn_app_result_list:
yarn_app_result_id = yarn_app_result['id']
job_name = str(yarn_app_result['job_name'])
job_def_id = yarn_app_result['job_def_id']
if ":" in job_def_id:
job_sql = str(job_def_id.split(":", 1)[1].strip()[:200])
else:
job_sql = str(job_def_id)
job_heuristic_result = DrElephantMonitor.query_yarn_app_heuristic_result(yarn_app_result_id)
DingDingUtil.send("任務名: %s\nSQL: %s\n%s" % (str(job_name), job_sql, str(job_heuristic_result)))
if __name__ == "__main__":
DrElephantMonitor.query_yarn_app_result()