kafka裏面的數據都是自定義拼接的字符串需要在logstash中filter做分割;
如果是json格式,則會被自動解析,無需分割。
下面樣例:
input{
kafka {
zk_connect => "bdc41.hexun.com:2181,bdc40.hexun.com:2181,bdc46.hexun.com:2181,bdc54.hexun.com:2181,bdc53.hexun.com:2181"
group_id => "logstash"
topic_id => "CyLog"
reset_beginning => false # boolean (optional)
consumer_threads => 3 # number (optional)
decorate_events => true
}
}
filter {
#用,分割
ruby{
init =>"@kname =['showflag','datetime','ip']"
code =>"event.append(Hash[@kname.zip(event['message'].split(/,/))])"
# remove_field => ["message"]
}
#有漢字則去掉註解,防止中文亂碼,<span style="font-family: Arial, Helvetica, sans-serif;">當然如果傳入編碼不統一,則會報錯</span>
# urldecode {
# all_fields => true
# }
}
output{
elasticsearch{
hosts => [ "10.130.2.53:9200","10.130.2.46:9200","10.130.2.54:9200" ]
flush_size=>50000
workers => 5
index=> "logstash-cylog"
}
}