kafka的數據來源於Nginx的日誌。
#源數據
192.168.1.123 - - [15/May/2020:21:47:39 +0800] "GET /nas/ma/q.gif?a=123&b=message&p=12345678901&b=p&timer=1589550459586 HTTP/1.1" 192.168.32.118:80 0.002 200
採集後的到kafka的數據
{
"@timestamp":"2020-05-15T13:47:43.216Z",
"@metadata":{
"beat":"filebeat",
"type":"_doc",
"version":"7.2.0",
"topic":"bigdata_nas_access"
},
"message":"192.168.1.123 - - [15/May/2020:21:47:39 +0800] "GET /nas/ma/q.gif?a=123&b=message&p=12345678901&b=p&timer=1589550459586 HTTP/1.1" 192.168.32.118:80 0.002 200"
}
logstash 腳本
input{
kafka {
bootstrap_servers => ["192.168.1.68:9092,192.168.1.69:9092,192.168.1.67:9092"]
client_id => "bigdata_88"
group_id => "bigdata_nas_access_88"
auto_offset_reset => "latest"
consumer_threads => 3
decorate_events => true
topics => ["bigdata_nas_access"]
type => "nas_access"
codec => 'json'
}
}
filter {
if [type] == "nas_access" {
grok {
match => {
"message" => "%{IPORHOST:Client_IP} (%{WORD:ident}|-) (%{USERNAME:auth}|-) \[%{HTTPDATE:timestamp}\] \"%{WORD:Http_Method} %{URIPATHPARAM:Http_Request} HTTP/%{NUMBER:Http_Version}\" (?:%{HOSTPORT:upstream_addr}|-) (%{BASE16FLOAT:upstream_response_time}|-) (%{BASE10NUM:upstream_status}|-)"
}
}
}
}
output {
stdout { codec => rubydebug }
if [type] == "nas_access" {
elasticsearch {
hosts => ["192.168.1.86:9200","192.168.1.87:9200","192.168.1.88:9200"]
index => "nas_access"
document_id => "%{Client_IP}" #用來判斷唯一性
}
}