[日誌處理工作之三]使用flume採集DB2日誌推送到kafka,並使用spark streaming拉取指定topic的日誌

實現了通過flume-ng將DB2日誌推入Kafka,用spark streaming訂閱kafka中相應的topic,將指定的信息,比如level級別是warning的,message發送給指定郵箱
1.通過flume-ng將DB2日誌推入Kafka
    參考summary1中的配置,本實驗中的配置如下
case-flume-kafka.conf:
agent.sources=source1
agent.channels=channel1
agent.sinks=sink1


agent.sources.source1.type = exec
#agent.sources.source1.command = tail -F /db2fs/home/db2inst1/sqllib/db2dump/db2diag.log
agent.sources.source1.command = cat /db2fs/opt/log-process/share/db2.log


agent.channels.channel1.type=memory
agent.channels.channel1.capacity=1000
agent.channels.channel1.transactionCapacity=100


agent.sources.source1.channels=channel1
agent.sinks.sink1.channel=channel1


agent.sinks.sink1.type=org.apache.flume.sink.kafka.KafkaSink
agent.sinks.sink1.topic=streamtest
agent.sinks.sink1.brokerList=9.115.42.108:9092
agent.sinks.sink1.requireAcks=1
agent.sinks.sink1.batchSize=20
2.參考summary1啓動zookeeper和kafka
3.啓動spark,通過spark-submit運行自己的程序,注意引用到的jar包
./bin/spark-submit --master spark://9.115.42.108:7077 --jars /db2fs/opt/log-process/share/spark-streaming-kafka_2.10-1.3.1.jar,/db2fs/opt/log-process/share/javamail-1.4.7/mail.jar,/db2fs/opt/log-process/share/kafka_2.10-0.8.1.1.jar,/db2fs/opt/log-process/share/metrics-core-2.2.0.jar /db2fs/opt/log-process/myprogram/zhangbo-streaming_1.0-0.0.1-SNAPSHOT.jar --class logstream.JavaKafkaMessageExtract
4.編程使用spark streaming處理從kafka pull的message,代碼如下:
package logstream;


import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.Map;


import scala.Tuple2;
import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.*;
import org.apache.spark.streaming.api.java.*;
import org.apache.spark.streaming.kafka.KafkaUtils;
import org.apache.spark.streaming.Durations;


import java.util.Properties;
import javax.mail.Address;
import javax.mail.MessagingException;
import javax.mail.Session;
import javax.mail.Message;
import javax.mail.Transport;
import javax.mail.internet.AddressException;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;


public class JavaKafkaMessageExtract {
   
    private static Pattern pattern = Pattern.compile("LEVEL:\\s([A-Za-z]+)PID");
    public static void main(String[] args) throws MessagingException {   
               
            String brokers = "X.X.X.X:9092";
            String topics = "streamtest";
            // Create context with 2 second batch interval
            SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaMessageExtract");
            JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2));


            HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(topics.split(",")));
            HashMap<String, String> kafkaParams = new HashMap<String, String>();
            kafkaParams.put("metadata.broker.list", brokers);


            // Create direct kafka stream with brokers and topics
            JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
                jssc,
                String.class,
                String.class,
                StringDecoder.class,
                StringDecoder.class,
                kafkaParams,
                topicsSet
            );
           
            messages.foreachRDD(new Function<JavaPairRDD<String,String>,Void>(){
                public Void call(JavaPairRDD<String, String> v1)
                        throws Exception {
                        v1.foreach(new VoidFunction<Tuple2<String, String>>(){
                              public void call(Tuple2<String, String> tuple2) {                 
                                      FileWriter writer;
                                    try {
                                          //writer.write(tuple2._2()+"$"+"\n");
                                          Matcher m = pattern.matcher(tuple2._2());
                                          if(m.find()&&m.group(1).equals("Warning")){
                                              writer = new  FileWriter("/db2fs/opt/log-process/myprogram/debug.txt",  true );
                                              writer.write(m.group(1)+"\n");
                                              sendmail(m.group(1),tuple2._2());
                                              writer.close();
                                          }                         
                                    } catch (IOException e) {
                                        e.printStackTrace();
                                    } catch (MessagingException e) {
                                        e.printStackTrace();
                                    }                            
                        }
                      });                   
                    return null;
                }                       
            });
            jssc.start();
            jssc.awaitTermination();
    }
   
    private static void sendmail(String eventLevel,String message) throws MessagingException{
        Properties props = new Properties(); 
        props.setProperty("mail.debug", "true");   
        props.setProperty("mail.smtp.auth", "true"); 
        props.setProperty("mail.smtp.port", "465"); 
        props.setProperty("mail.host", "smtp.163.com"); 
        props.setProperty("mail.smtp.ssl.enable", "true");
        props.setProperty("mail.transport.protocol", "smtp");
        props.put("mail.smtp.socketFactory.class","javax.net.ssl.SSLSocketFactory");
        props.put("mail.smtp.socketFactory.fallback", "false");
        props.setProperty("mail.smtp.quitwait", "false");


        Session session = Session.getInstance(props);     
        Message msg = new MimeMessage(session); 
        msg.setSubject("["+eventLevel+"]"); 
        msg.setText(message); 
        msg.setFrom(new InternetAddress("[email protected]")); 
         
        Transport transport = session.getTransport(); 


        transport.connect("username", "password"); 


        transport.sendMessage(msg, new Address[] {new InternetAddress("[email protected]")}); 
   
        transport.close();
    }
}
5.這時當日志中出現Warning級別的日誌時,[email protected]郵箱中就會收到包含message的郵件
6.使用flume訂閱kafka中的消息向elasticsearch中寫 解析一定要放在consumer端,放在producer端往kafka push是無用的
特別注意flume作爲consumer是受zookeeper管理的,此時flume的lib下必須包含zookeeper的jar包,否則報錯
配置文件如下:
case-kafka-flume-elastic.conf:
agent.sources=source1
agent.channels=channel1
agent.sinks=sink1


agent.sources.source1.type = org.apache.flume.source.kafka.KafkaSource
agent.sources.source1.zookeeperConnect = localhost:2181
agent.sources.source1.topic = streamtest
agent.sources.source1.groupId = flume
agent.sources.source1.kafka.consumer.timeout.ms=100


agent.sources.source1.interceptors = i1 i2 i3 i4 i5 i5 i6 i7 i8 i9 i10 i11 i12 i13 i14 i15 i16


agent.sources.source1.interceptors.i1.type=regex_extractor
agent.sources.source1.interceptors.i1.regex =([0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}.[0-9]{2}.[0-9]{2}.[0-9]{6}[+][0-9]{3})\\s+
agent.sources.source1.interceptors.i1.serializers=s1
agent.sources.source1.interceptors.i1.serializers.s1.name=TIME


agent.sources.source1.interceptors.i2.type=regex_extractor
agent.sources.source1.interceptors.i2.regex =\\s+([A-Z0-9]+)\\s+LEVEL
agent.sources.source1.interceptors.i2.serializers=s1
agent.sources.source1.interceptors.i2.serializers.s1.name=ID


agent.sources.source1.interceptors.i3.type=regex_extractor
agent.sources.source1.interceptors.i3.regex =LEVEL:\\s([A-Z][a-z]+)PID
agent.sources.source1.interceptors.i3.serializers=s1
agent.sources.source1.interceptors.i3.serializers.s1.name=LEVEL


agent.sources.source1.interceptors.i4.type=regex_extractor
agent.sources.source1.interceptors.i4.regex =PID\\s+:\\s+([0-9]{1,5})\\s+TID
agent.sources.source1.interceptors.i4.serializers=s1
agent.sources.source1.interceptors.i4.serializers.s1.name=PID


agent.sources.source1.interceptors.i5.type=regex_extractor
agent.sources.source1.interceptors.i5.regex =TID\\s+:\\s+([0-9]{15})\\s+PROC
agent.sources.source1.interceptors.i5.serializers=s1
agent.sources.source1.interceptors.i5.serializers.s1.name=TID


agent.sources.source1.interceptors.i6.type=regex_extractor
agent.sources.source1.interceptors.i6.regex =PROC\\s+:\\s+([a-z0-9]+\\s*[0-9]*)
agent.sources.source1.interceptors.i6.serializers=s1
agent.sources.source1.interceptors.i6.serializers.s1.name=PROC


agent.sources.source1.interceptors.i7.type=regex_extractor
agent.sources.source1.interceptors.i7.regex =INSTANCE:\\s+([a-z0-9]+)\\s+
agent.sources.source1.interceptors.i7.serializers=s1
agent.sources.source1.interceptors.i7.serializers.s1.name=INSTANCE


agent.sources.source1.interceptors.i8.type=regex_extractor
agent.sources.source1.interceptors.i8.regex =NODE\\s+:\\s([0-9]{3})
agent.sources.source1.interceptors.i8.serializers=s1
agent.sources.source1.interceptors.i8.serializers.s1.name=NODE


agent.sources.source1.interceptors.i9.type=regex_extractor
agent.sources.source1.interceptors.i9.regex =NODE\\s+:\\s[0-9]{3}\\s*DB\\s+:\\s+([A-Z]{4})
agent.sources.source1.interceptors.i9.serializers=s1
agent.sources.source1.interceptors.i9.serializers.s1.name=DB


agent.sources.source1.interceptors.i10.type=regex_extractor
agent.sources.source1.interceptors.i10.regex =APPHDL\\s+:\\s+([0-9]+-[0-9]+)\\s*
agent.sources.source1.interceptors.i10.serializers=s1
agent.sources.source1.interceptors.i10.serializers.s1.name=APPHDL


agent.sources.source1.interceptors.i11.type=regex_extractor
agent.sources.source1.interceptors.i11.regex =APPID:\\s+(.[A-Z]+\.[0-9a-z]+\.[0-9]+)\\s*
agent.sources.source1.interceptors.i11.serializers=s1
agent.sources.source1.interceptors.i11.serializers.s1.name=APPID


agent.sources.source1.interceptors.i12.type=regex_extractor
agent.sources.source1.interceptors.i12.regex =AUTHID\\s+:\\s+([0-9A-Z]{7}[0-9])\\s+
agent.sources.source1.interceptors.i12.serializers=s1
agent.sources.source1.interceptors.i12.serializers.s1.name=AUTHID


agent.sources.source1.interceptors.i13.type=regex_extractor
agent.sources.source1.interceptors.i13.regex =HOSTNAME:\\s+([a-zA-Z0-9.]+)[A-Z]+
agent.sources.source1.interceptors.i13.serializers=s1
agent.sources.source1.interceptors.i13.serializers.s1.name=HOSTNAME


agent.sources.source1.interceptors.i14.type=regex_extractor
agent.sources.source1.interceptors.i14.regex =EDUID\\s+:\\s+([0-9]+)
agent.sources.source1.interceptors.i14.serializers=s1
agent.sources.source1.interceptors.i14.serializers.s1.name=EDUID


agent.sources.source1.interceptors.i15.type=regex_extractor
agent.sources.source1.interceptors.i15.regex =EDUNAME:\\s+([a-z0-9]+\\s*[\(a-zA-Z\)]*\\s0)\\s*[A-Z]+
agent.sources.source1.interceptors.i15.serializers=s1
agent.sources.source1.interceptors.i15.serializers.s1.name=EDUNAME


agent.sources.source1.interceptors.i16.type=regex_extractor
agent.sources.source1.interceptors.i16.regex =FUNCTION:\\s+(DB2\\s+[A-Z]+,[ a-zA-Z]+,[ a-zA-Z_/:]+,\\sprobe:[0-9]+)
agent.sources.source1.interceptors.i16.serializers=s1
agent.sources.source1.interceptors.i16.serializers.s1.name=FUNCTION


agent.channels.channel1.type=memory
agent.channels.channel1.capacity=1000
agent.channels.channel1.transactionCapacity=100


agent.sources.source1.channels=channel1
agent.sinks.sink1.channel=channel1


#agent.sinks.sink1.type=logger
agent.sinks.sink1.type=org.apache.flume.sink.elasticsearch.ElasticSearchSink
agent.sinks.sink1.batchSize=100
agent.sinks.sink1.hostNames=9.115.42.108:9300
agent.sinks.sink1.indexName=flume-db2
agent.sinks.sink1.indexType=bar_type
agent.sinks.sink1.clusterName=elasticsearch
agent.sinks.sink1.serializer=org.apache.flume.sink.elasticsearch.ElasticSearchLogStashEventSerializer




7.啓動kibana會看到訂閱的message


實驗走通了兩條線:flume--kafka--spark--EventWarning
                                    flume--kafka--flume-elasticsearch-kibana


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章