1、kafka 數據格式,tab 分隔
bucket_online_backends world 105102 1 540 0 2019-11-25 08:10:00.0 320 2019-11-25 07:37:15.0
bucket_online_logs version 1051189902 1 555 0 2019-11-25 08:10:0.0 46 661 2019-11-25 07:37:15.0
2、編寫配置文件
vim game-kafka-to-hdfs.conf
# #####################################################################
# DESC: Logstash configuration file. Typically forwarding logs to
# Elasticsearch instance.
# #####################################################################
input {
kafka {
bootstrap_servers => ["192.168.1.5:9092"]
client_id => "backends"
group_id => "backends"
auto_offset_reset => "latest"
consumer_threads => 5
enable_auto_commit => true
decorate_events => true
max_poll_records => "1"
topics => "t_bucket_online"
type => "_doc"
}
}filter{
if "bucket_online_logs" in [message] {
mutate {
split => {"message" => " "}
add_field => {
"onlineTime" => "%{[message][6]}"
}
}
} else if "bucket_online_backends" in [message] {
mutate {
split => {"message" => " "}
add_field => {
"onlineTime" => "%{[message][6]}"
}
}
} else {
drop{}
}mutate {
split => ["onlineTime" ," "]
add_field => ["onlineDate", "%{onlineTime[0]}"]
remove_field => ["onlineTime"]
}
mutate {
join => ["message", " "]
}
}output {
webhdfs {
host => "namenode1-master" # namenode節點
port => "50070" # webhdfs端口
user => "root" #以這個用戶的權限去寫hdfs
path => "/home/game_online/raw/%{onlineDate}.txt" # 按天創建log
codec => plain {
format => "%{message}"
}
}
}
3、運行任務
nohup /home/logstash/bin/logstash -f /home/logstash/config/game-kafka-to-hdfs.conf --path.data=/data/tmp/logstash/game-kafka-to-hdfs &
4、輸出內容與kafka輸入內容完全一致