通過 Dr-Elephant 監控異常HIVE任務並報警

Dr-Elephant收集到的YARN JOB信息後,存入對應的mysql表(yarn_app_result、yarn_app_heuristic_result、yarn_app_heuristic_result_details)

  • yarn_app_result (job是否有異常)
  • yarn_app_heuristic_result (job的異常指標)
  • yarn_app_heuristic_result_details (job的異常指標對應的詳細說明)

通過mysql表裏收集到的數據,進行監控異常並且報警

最後報警信息展示如下:

在這裏插入圖片描述

Code Demo

# coding: utf-8
import torndb
import time
import sys
import requests
reload(sys)
sys.setdefaultencoding('utf-8')


class DingDingUtil(object):
    dingtalk_url = "https://oapi.dingtalk.com/robot/send?access_token=xxxx"

    @staticmethod
    def send(msg):
        params = """{"msgtype": "text","text": {"content": "%s"},"at": {"isAtAll": true}}""" % msg
        headers = {"Content-Type": "application/json"}
        requests.post(DingDingUtil.dingtalk_url, data=params, headers=headers)


class DrElephantMonitor(object):
    severity_name_dict = {
        0: "None",
        1: "Low",
        2: "Moderate",
        3: "Severe",
        4: "Critical"
    }
    mysql_client = torndb.Connection("host", "db",
                                     user="user", password="pass", time_zone='+8:00')

    @staticmethod
    def query_yarn_app_heuristic_result_details(yarn_app_heuristic_result_id):
        """
        :param yarn_app_heuristic_result_id: YARN JOB METRICS ID
        :return: 異常指標詳情
        """
        output_heuristic_result_details = ""
        sql = """
          select
            name, value
          from yarn_app_heuristic_result_details
          where yarn_app_heuristic_result_id = %s
        """ % yarn_app_heuristic_result_id
        heuristic_result_details_list = DrElephantMonitor.mysql_client.query(sql)
        for heuristic_result_details in heuristic_result_details_list:
            output_heuristic_result_details += '{:<40}{:<}\n'.format(heuristic_result_details['name'],heuristic_result_details['value'])
        return output_heuristic_result_details



    @staticmethod
    def query_yarn_app_heuristic_result(yarn_app_result_id):
        """
        :param yarn_app_result_id: YARN JOB ID
        :return: 返回待優化的JOB ID對應的異常指標值
        """
        output_heuristic_result = ""
        sql = """
          select id, heuristic_name, severity, score
          from yarn_app_heuristic_result
          where yarn_app_result_id = '%s'
            and severity >= 3
        """ % yarn_app_result_id
        heuristic_result_list = DrElephantMonitor.mysql_client.query(sql)

        for heuristic_result in heuristic_result_list:
            heuristic_result_id = heuristic_result['id']
            heuristic_name = heuristic_result['heuristic_name']
            severity_name = DrElephantMonitor.severity_name_dict[heuristic_result['severity']]
            score = heuristic_result['score']
            output_heuristic_result_details = DrElephantMonitor.query_yarn_app_heuristic_result_details(
                heuristic_result_id)
            output_heuristic_result += '\n異常指標: {:<}  嚴重程度: {:<}  待優化指數: {:<}\n{:<}\n'.format(heuristic_name,
                                                                                          severity_name, score,
                                                                                          output_heuristic_result_details)
        return output_heuristic_result

    @staticmethod
    def query_yarn_app_result():
        """
        :return: 找到最近時間段待優化 YARN JOB
        """
        now_timestamp_ms = int(time.time() * 1000)
        begin_timestamp_ms = now_timestamp_ms - (10 * 60 * 1000)
        sql = """
            select
              id, job_name, job_def_id
            from yarn_app_result
            where finish_time <= %s and finish_time >= %s 
              and severity >= 3
            order by score desc limit 5
        """ % (now_timestamp_ms, begin_timestamp_ms)
        yarn_app_result_list = DrElephantMonitor.mysql_client.query(sql)
        for yarn_app_result in yarn_app_result_list:
            yarn_app_result_id = yarn_app_result['id']
            job_name = str(yarn_app_result['job_name'])
            job_def_id = yarn_app_result['job_def_id']
            if ":" in job_def_id:
                job_sql = str(job_def_id.split(":", 1)[1].strip()[:200])
            else:
                job_sql = str(job_def_id)
            job_heuristic_result = DrElephantMonitor.query_yarn_app_heuristic_result(yarn_app_result_id)
            DingDingUtil.send("任務名: %s\nSQL: %s\n%s" % (str(job_name), job_sql, str(job_heuristic_result)))


if __name__ == "__main__":
    DrElephantMonitor.query_yarn_app_result()


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章