logstash消費kafka的數據

kafka的數據來源於Nginx的日誌。

#源數據
192.168.1.123 - - [15/May/2020:21:47:39 +0800]  "GET /nas/ma/q.gif?a=123&b=message&p=12345678901&b=p&timer=1589550459586 HTTP/1.1" 192.168.32.118:80 0.002 200

採集後的到kafka的數據

{
    "@timestamp":"2020-05-15T13:47:43.216Z",
    "@metadata":{
        "beat":"filebeat",
        "type":"_doc",
        "version":"7.2.0",
        "topic":"bigdata_nas_access"
    },
    "message":"192.168.1.123 - - [15/May/2020:21:47:39 +0800]  "GET /nas/ma/q.gif?a=123&b=message&p=12345678901&b=p&timer=1589550459586 HTTP/1.1" 192.168.32.118:80 0.002 200"
}

logstash 腳本

input{
kafka {
    bootstrap_servers => ["192.168.1.68:9092,192.168.1.69:9092,192.168.1.67:9092"]
    client_id => "bigdata_88"
    group_id => "bigdata_nas_access_88"
    auto_offset_reset => "latest"
    consumer_threads => 3
    decorate_events => true
    topics => ["bigdata_nas_access"]
    type => "nas_access"
    codec => 'json'
  }
}
filter {
  if [type] == "nas_access" {
        grok {
            match => {
                "message" => "%{IPORHOST:Client_IP} (%{WORD:ident}|-) (%{USERNAME:auth}|-) \[%{HTTPDATE:timestamp}\]  \"%{WORD:Http_Method} %{URIPATHPARAM:Http_Request} HTTP/%{NUMBER:Http_Version}\" (?:%{HOSTPORT:upstream_addr}|-) (%{BASE16FLOAT:upstream_response_time}|-) (%{BASE10NUM:upstream_status}|-)"
            }
        }
  }
}
output {
        stdout { codec => rubydebug }
 if [type] == "nas_access" {
     elasticsearch {
     hosts => ["192.168.1.86:9200","192.168.1.87:9200","192.168.1.88:9200"]
     index => "nas_access"
     document_id => "%{Client_IP}"  #用來判斷唯一性
   }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章