使用python處理日誌文件

   最近爲了分析程序運行情況,以及得出機器的最高性能。需要從日誌文件中得出程序輸出、效率;爲了使用數據直觀,工作方便,於是使用了python.

    日誌文件如下

2020-03-06 00:13:29.134 [Debug]: SnapFaceMongoDao::syncData2Java response:{"status":200}
2020-03-06 00:13:29.134 [Debug]: SnapFaceMongoDao::syncData2Java all use: 19
2020-03-06 00:13:29.134 [Debug]: AlarmWithoutMask camera_id:269 wear_mask:0, face_id:4040e0e85efc11eaa4d3ac1f6b947f3a
2020-03-06 00:13:29.135 [Debug]: SnapFaceMongoDao::syncData2Java response:{"status":200}
2020-03-06 00:13:29.135 [Debug]: SnapFaceMongoDao::syncData2Java all use: 19
2020-03-06 00:13:29.135 [Debug]: AlarmWithoutMask camera_id:269 wear_mask:0, face_id:4040ddf05efc11ea9ed0ac1f6b947f3a
2020-03-06 00:13:29.137 [Debug]: PersonAttributeMongoDao::syncData2Java response:success
2020-03-06 00:13:29.137 [Debug]: PersonAttributeMongoDao::syncData2Java time statis use: 16
2020-03-06 00:13:29.137 [Debug]: single person total cost: 132------------
2020-03-06 00:13:29.137 [Debug]: SnapAlarmMongoDao::syncData2Java time statis type:5 use: 15
2020-03-06 00:13:29.137 [Information]: !!!!!!!!!!!!!!!!!!camera[269] catch person[] without mask!!!!!!!!!!!!!!!
2020-03-06 00:13:29.138 [Debug]: SnapAlarmMongoDao::syncData2Java time statis type:5 use: 15
2020-03-06 00:13:29.138 [Information]: !!!!!!!!!!!!!!!!!!camera[269] catch person[] without mask!!!!!!!!!!!!!!!
2020-03-06 00:13:29.146 [Debug]: PersonAttributeMongoDao::syncData2Java response:success
2020-03-06 00:13:29.146 [Debug]: PersonAttributeMongoDao::syncData2Java time statis use: 38
2020-03-06 00:13:29.146 [Debug]: single person total cost: 141------------
2020-03-06 00:13:29.148 [Debug]: PersonAttributeMongoDao::syncData2Java response:success
2020-03-06 00:13:29.148 [Debug]: PersonAttributeMongoDao::syncData2Java time statis use: 31

 需要從

      2020-03-06 00:13:29.134 [Debug]: SnapFaceMongoDao::syncData2Java all use: 19

      2020-03-06 00:13:29.137 [Debug]: PersonAttributeMongoDao::syncData2Java time statis use: 16

     2020-03-06 00:13:29.137 [Debug]: SnapAlarmMongoDao::syncData2Java time statis type:5 use: 15

     2020-03-06 00:13:29.212 [Debug]: SnapSceneMongoDao::syncSceneDataToJava time record id:404cb2245efc11ea9827ac1f6b947f3a use: 34 

     2020-03-06 00:13:29.252 [Debug]: SnapBodyMongoDao::syncData2Java use: 19 id:404cfaa45efc11eaa0a5ac1f6b947f3a

     提取出時間和耗時,另外對其計數

#!/usr/bin/python2.7
# -*- coding: UTF-8 -*-
import os
import re
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# str = '2020-03-05 12:52:29.272 [Debug]: SnapFaceMongoDao::syncData2Java all use: 16'
# pattern = re.compile(r'^2020-03-05 12:5[0-2].+$')
# match = pattern.match(str)
#
# if match:
#     print(match.group())

def findlog_by_time_indir(log_dir, pattern, out_file_name):
    ofile = open(out_file_name, 'w')
    file_names = os.listdir(log_dir)
    for file_name in file_names:
        print(file_name)
        if not os.path.isdir(file_name):
            f = open(log_dir + "/" + file_name, "r")
            iter_f = iter(f)
            for line in iter_f:
                match = pattern.match(line)
                if match:
                    print(match.group())
                    ofile.writelines(match.group())
                    ofile.write("\n")

def statistics_count(input_file, pattmap, out_count): #處理成二維數組,[[時間,數量,耗時]]
    with open(input_file) as f:
        iter_f = iter(f)
        for line in iter_f:
            for key, value in pattmap.items():
                pattern = re.compile(r'' + value)
                match = pattern.match(line)
                if match:
                    cur_time = match.group(1)
                    cur_time = datetime.strptime(cur_time, '%Y-%m-%d %H:%M:%S.%f')
                    use_time = match.group(2)
                    use_time = int(use_time)
                    if(out_count.has_key(key)):
                        old_count = out_count[key][-1][1]
                        out_count[key].append([cur_time, old_count + 1, use_time])
                    else:
                        out_count[key]=[[cur_time, 1, use_time]]
                    break

def draw_time_count(out_count):
    # plt.figure(figsize=(200,180), dpi=80)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M')) #橫座標日期顯示格式
    plt.gca().xaxis.set_major_locator(mdates.MinuteLocator(interval = 10)) #橫座標每過5分鐘畫一個點
    plt.xlabel('time')
    plt.ylabel('count')

    last_value_map = {}
    dtime_min = datetime.strptime('2050-03-06 12:00:00.000', '%Y-%m-%d %H:%M:%S.%f') #不能對None對象賦值
    for key in out_count:
        plot_np_array = np.array(out_count[key]) #轉化爲np用於切片
        xs = plot_np_array[:,0] #切片,':'前面沒東西表示所有行,0表示取0列,取出之後組成一個新的一list
        ys = plot_np_array[:,1]
        if (xs[0] < dtime_min):
            dtime_min = xs[0]

        last_value_map[key] = (xs[-1], ys[-1])
        plt.plot(xs, ys, label=key)

    yoffset = 0
    for key,value in last_value_map.items():
        if key!='SnapScene':
            plt.scatter(value[0], value[1] - yoffset) #在座標處畫一個標註,一般是一個點
            plt.annotate(key + ' ' + str(value[1]), (value[0], value[1]), (value[0], value[1] - yoffset)) #在座標處添加文本註釋
            yoffset += 20000
        else:
            plt.scatter(value[0], value[1])
            plt.annotate(key + ' ' + str(value[1]), (value[0], value[1]))

    plt.xlim(xmin=dtime_min)#設置x軸的範圍
    plt.ylim(ymin=0)#設置y軸的範圍
    plt.gcf().autofmt_xdate() #自動旋轉x軸日期顯示
    plt.legend(loc='upper left') #添加legend
    plt.show()

def draw_time_cost(out_count):
    # plt.figure(figsize=(200,180), dpi=80)
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M')) #橫座標日期顯示格式
    plt.gca().xaxis.set_major_locator(mdates.MinuteLocator(interval = 5)) #橫座標每過5分鐘畫一個點
    plt.xlabel('time')
    plt.ylabel('cost time')

    for key in out_count:
        plot_np_array = np.array(out_count[key]) #轉化爲np用於切片
        xs = plot_np_array[:,0] #切片,':'前面沒東西表示所有行,0表示取0列,取出之後組成一個新的一list
        ys = plot_np_array[:,2]
        plt.plot(xs, ys, label=key)

    plt.gcf().autofmt_xdate() #自動旋轉x軸日期顯示
    plt.legend(loc='upper left') #添加legend
    plt.show()

def main():
    # findlog_by_time_indir("log", re.compile(r'2020-03-06 0[0-1]:\d\d.+$'), "log_bytime.txt")

    labes_map = {
        'SnapFace': '(.+) \[Debug\]: SnapFaceMongoDao::syncData2Java all use: (\d+)',
        'SnapBody': '(.+) \[Debug\]: SnapBodyMongoDao::syncData2Java use: (\d+)',
        'SnapScene': '(.+) \[Debug\]: SnapSceneMongoDao::syncSceneDataToJava time record id:.+ use: (\d+)',
        'SnapAlarm': '(.+) \[Debug\]: SnapAlarmMongoDao::syncData2Java time statis type:\d use: (\d+)',
        'Attribute': '(.+) \[Debug\]: PersonAttributeMongoDao::syncData2Java time statis use: (\d+)'
    }

    out_count = {}
    print  'statistics start'
    statistics_count('/home/whl/seye-logs-21/log/KLMediaServer.log', labes_map, out_count)

    # print 'draw_time_count start'
    # draw_time_count(out_count)

    print 'draw_time_cost start'
    draw_time_cost(out_count)

if __name__ == '__main__':
    main()

     運行結果爲

 

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章