logstash安装使用及使用filter处理日志示例

下载logstash(最好下载与es相同的版本,这里为了测试下载的低版本)

wget https://download.elastic.co/logstash/logstash/logstash-2.3.4.tar.gz

解压

tar -zxvf logstash-2.3.4.tar.gz

运行测试

# 使用标准输入输出
./logstash-2.3.4/bin/logstash -e 'input { stdin { } } output { stdout {} }'

# 使用标准输入输出,输出格式化为json
./logstash-2.3.4/bin/logstash -e 'input { stdin { } } output { stdout {codec => json} }'

# 加载配置文件启动
./logstash-2.3.4/bin/logstash -f logstash-simple.conf
# 加载多个配置文件启动
./logstash-2.3.4/bin/logstash -f .conf/*

logstash模式

logstash做的事情分三个阶段依次执行:输入——》处理filter(不是必须)——》输出
这里写图片描述

logstash配置文件

这里写图片描述

宏观配置文件格式

# 输入
input {
  ...
}

# 过滤器
filter {
  ...
}

# 输出
output {
  ...
}

配置文件示例1:

# 参考 https://www.jianshu.com/p/25ed5ed46682
# https://doc.yonyoucloud.com/doc/logstash-best-practice-cn/filter/kv.html
# https://www.cnblogs.com/qq27271609/p/4762562.html

# 日志格式
# [log_time=2018-11-01 15:47:03] [level=ERROR] [app_name=logback_test] [version=1.0.0] [class=com.wk.logbackdemo.LogbackTest] test error


input {
        file {
                #add_field => {"project_name" => "battleship"}
                #tags => "tag1"
                path => ["/home/es-wk/logstash/logs/info/*.log","/home/es-wk/logstash/logs/debug/*.log","/home/es-wk/logstash/logs/warn/*.log","/home/es-wk/logstash/logs/error/*.log"]
                start_position => beginning
				sincedb_path => "/dev/null" #从头读  第一读取时就加上才会生效  后面再加需要用新的日志文件 测试用
				# 多行合并  日志中可能出现换行等,需要根据规则合并,如下注释是根据每行开头匹配其是不是另一条日志
#				codec => multiline {
#					pattern => "^%{TIMESTAMP_ISO8601} "
#					negate => true
#					what => previous
#               }
        }
}

# 可写多个  内部顺序执行
filter {
		mutate {
				# 替换掉开头的[
				gsub => ["message", "\[", ""]
				# 根据]分割字段
				split => ["message", "] "]

				add_field => { "log_time" => "%{[message][0]}"}
				add_field => { "level" => "%{[message][1]}"}
				add_field => { "app_name" => "%{[message][2]}"}
				add_field => { "version" => "%{[message][3]}"}
				add_field => { "class" => "%{[message][4]}"}
				rename => ["host", "host_name"]
        }

		kv {
			#include_keys => ["log_time", "level", "version", "class"]
				field_split => "="
		}

		mutate {
				replace => {"message" => "%{[message][5]}"}
		}
		
		# 提取年份 月份 日期
		grok {
			match => ["log_time", "(?<YYYY>\d{4})-(?<MM>\d{1,2})-(?<DD>\d{1,2})"]
		}
}

output {
        elasticsearch{
			hosts => ["127.0.0.1:19200"]
			index => "%{app_name}-%{YYYY}-%{MM}"
			user => "elastic" 
			password => "changme"
		}
		# 调试用
		stdout { codec => rubydebug }
}

配置文件示例2:

# 日志格式: 2018-12-14 16:49:21 [INFO] [TxId : Alien_axx21x003^1544669102193^46037 , SpanId : 2452398774611903875] com.dao.base.BaseDao.selectRowValueEqual selectRowValueEqual -->select * from ltemplate_style where  template_id = ? and type = ? order by create_time desc

input {
		stdin{
		}
#		beats {
#	    	port => "5044"     
#	    }	
#        file {
#                #add_field => {"project_name" => "battleship"}
#                #tags => "tag1"
#                path => ["/home/es-wk/logstash/logs/info/*.log","/home/es-wk/logstash/logs/debug/*.log","/home/es-wk/logstash/logs/warn/*.log","/home/es-wk/logstash/logs/error/*.log"]
#                start_position => beginning
#				sincedb_path => "/dev/null" #从头读  第一读取时就加上才会生效  后面再加需要用新的日志文件 测试用
#				# 多行合并  日志中可能出现换行等,需要根据规则合并,如下注释是根据每行开头匹配其是不是另一条日志
#				codec => multiline {
#					pattern => "^%{TIMESTAMP_ISO8601} "
#					negate => true
#					what => previous
#               }
#        }
}

# 可写多个  内部顺序执行
filter {
        # 正则提取数据 ruby的正则
        grok {
            match => {"message" => "%{TIMESTAMP_ISO8601:log_time}"}
        }
         grok { 
			match => { "message" => "(?<level>(?<=\[)\w+(?=\]))" }
        }
        grok {
            match => {"message" => "(?<tx_id>(?<=\[TxId\s:\s)\S+(?=\s,))"}
        }
        grok {
            match => {"message" => "(?<span_id>(?<=,\sSpanId\s:\s)\S+(?=\s*\]))"}
        }
		grok {
			match => {"message" => "(?<message>(?<=[0-9]\]\s)[\s\S]*(?=$))"}
		}
        # message只保留提示信息
		mutate {
				replace => {"message" => "%{[message][1]}"}
		}
		
		# 提取年份 月份 日期
		grok {
			match => ["log_time", "(?<YYYY>\d{4})-(?<MM>\d{1,2})-(?<DD>\d{1,2})"]
		}
}

output {
        #elasticsearch{
		#	hosts => ["127.0.0.1:19200"]
		#	index => "log-%{YYYY}-%{MM}-%{app_name}"
		#	user => "elastic" 
		#	password => "changme"
		#}
		# 调试用
		stdout { codec => rubydebug }
}

示例3:(任意数量[k=v]格式日志清洗)

input {
  beats {
   port => 5044
  }
  # stdin{}
}

#[log_time=$time_iso8601] [real_ip=$http_x_forwarded_for] [client_ip=$remote_addr]	[status=$status] [request_time=$request_time] [uri=$request_uri] [referer=$http_referer][request=$request] [upstream_addr=$upstream_addr] [upstream_status=$upstream_status] [upstream_response_time=$upstream_response_time] [agent=$agent] [request_length=$request_length] [bytes_sent=$bytes_sent]  


# docker run --rm -it -v /Users/wk/Desktop/logstash-online.conf:/usr/share/logstash/pipeline/logstash.conf docker.elastic.co/logstash/logstash:6.4.2
# [timestamp=2019-02-28 10:24:05.981] [level=DEBUG] [tx_id=] [span_id=] [bu_id=AXX] [app_id=AXX001] [msg=[xxx=111]] xxx=1111

# [timestamp=2019-03-12 14:13:09.267] [level=WARN] [tx_id=] [span_id=] [bu_id=AXX] [app_id=AXX001] Sync config from upstream redis.basic?ip=131 [Cause: Could not complete get operation [Cause: Read timed out]]

filter {
    # (?!=\s)
  	grok {
    #     match => {"message" => "(?<message_fmt>((\[[\s\S]+\] )+))"}
        match => {"message" => "(?<message_fmt>((\[[\s\S]+?\] *)+))"}
    }
    kv {
        # kv化的源内容
        source => "message_fmt"
        # 多个kv的切割符
        field_split_pattern => "\] "
        # k的前缀
        # prefix => "["
        # kv的分割符
        value_split => "="
        # 去掉key的首尾字符
        trim_key => "\[\]"
        # 去掉value的首尾字符
        trim_value => "\[\]"
        # 是否递归取值
        recursive => "false"
        # 是否包含各种括号
        include_brackets => "false"
        # 是否允许多个相同的key存在(存在时会组成数组)
        allow_duplicate_values => false
    }
    date {
	    match => ["timestamp", "yyyy-MM-dd HH:mm:ss.SSS"]
	  }
    mutate {
      remove_field => "%{}"
	   lowercase => ["app_id"]
    #  remove_field => "message_fmt"
	   remove_field => "timestamp"
	   # filebeat字段
	   remove_field => "tags"
	   remove_field => "prospector"
	   remove_field => "input"
	   remove_field => "prospector"
     remove_field => "offset"
    #  remove_field => "host"
	  }
}

output {
# 调试用
  stdout { codec => rubydebug }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章