(3)Storm實時日誌分析實戰--編碼實現

  1. LogParserBolt類

    package com.ibeifeng.bigdata.storm.weglog;
    
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.IBasicBolt;
    import backtype.storm.topology.IRichBolt;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.tuple.Fields;
    import backtype.storm.tuple.Tuple;
    import backtype.storm.tuple.Values;
    
    import java.text.DateFormat;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.Map;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    import static com.ibeifeng.bigdata.storm.weglog.WebLogConstants.*;
    /**
     * 日誌解析類
     * Created by ad on 2016/12/17.
     */
    public class LogParserBolt implements IRichBolt {
    
        private Pattern pattern;
    
        private OutputCollector  collector;
        @Override
        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
            pattern = Pattern.compile("([^ ]*) [^ ]* [^ ]* \\[([\\d+]*)\\] \\\"[^ ]* ([^ ]*) [^ ]*\\\" \\d{3} \\d+ \\\"([^\"]*)\\\" \\\"([^\"]*)\\\" \\\"[^ ]*\\\"");
            this.collector = collector;
        }
    
        @Override
        public void execute(Tuple input) {
            String webLog = input.getStringByField("str");
    
            // 解析
            if(webLog!= null || !"".equals(webLog)){
    
                Matcher matcher = pattern.matcher(webLog);
                if(matcher.find()){
                    //matcher.group(0);
                    String ip = matcher.group(1);
                    String serverTimeStr = matcher.group(2);
    
                    // 處理時間
                    long timestamp = Long.parseLong(serverTimeStr);
                    Date date = new Date();
                    date.setTime(timestamp);
    
                    DateFormat df = new SimpleDateFormat("yyyyMMddHHmm");
                    String dateStr = df.format(date);
                    String day = dateStr.substring(0,8);
                    String hour = dateStr.substring(0,10);
                    String minute = dateStr ;
    
                    String requestUrl = matcher.group(3);
                    String httpRefer = matcher.group(4);
                    String userAgent = matcher.group(5);
                    // 分流
                    this.collector.emit(IP_COUNT_STREAM, input,new Values(day, hour, minute, ip));
                    this.collector.emit(URL_PARSER_STREAM, input,new Values(day, hour, minute, requestUrl));
                    this.collector.emit(HTTPREFER_PARSER_STREAM, input,new Values(day, hour, minute, httpRefer));
                    this.collector.emit(USERAGENT_PARSER_STREAM, input,new Values(day, hour, minute, userAgent));
    
                }
            }
    
            this.collector.ack(input);
        }
    
        @Override
        public void cleanup() {
    
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declareStream(IP_COUNT_STREAM,new Fields(DAY, HOUR, MINUTE, IP));
            declarer.declareStream(URL_PARSER_STREAM,new Fields(DAY, HOUR, MINUTE, REQUEST_URL));
            declarer.declareStream(HTTPREFER_PARSER_STREAM,new Fields(DAY, HOUR, MINUTE, HTTP_REFER));
            declarer.declareStream(USERAGENT_PARSER_STREAM,new Fields(DAY, HOUR, MINUTE, USERAGENT));
        }
    
        @Override
        public Map<String, Object> getComponentConfiguration() {
            return null;
        }
    }
    
  2. UserAgentParserBolt類

    package com.ibeifeng.bigdata.storm.weglog;
    
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.IBasicBolt;
    import backtype.storm.topology.IRichBolt;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.tuple.Fields;
    import backtype.storm.tuple.Tuple;
    import backtype.storm.tuple.Values;
    import com.ibeifeng.bigdata.storm.util.UserAgentUtil;
    
    import static com.ibeifeng.bigdata.storm.weglog.WebLogConstants.*;
    
    import java.util.Map;
    
    /**
     * 解析UserAgent
     * Created by ad on 2016/12/18.
     */
    public class UserAgentParserBolt implements IRichBolt {
    
        private OutputCollector _collector;
        @Override
        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
            this._collector = collector;
        }
    
        @Override
        public void execute(Tuple input) {
            String day = input.getStringByField(DAY);
            String hour = input.getStringByField(HOUR);
            String minute = input.getStringByField(MINUTE);
            String userAgent = input.getStringByField(USERAGENT);
    
            // 解析userAgent
            if(userAgent != null && !"".equals(userAgent)){
                UserAgentUtil.UserAgentInfo userAgentInfo =
                        UserAgentUtil.analyticUserAgent(userAgent);
    
                if(userAgentInfo!= null){
    
                    String browserName = userAgentInfo.getBrowserName();
                    String browserVersion = userAgentInfo.getBrowserVersion();
    
                    if(browserName!=null && !"".equals(browserName)){
                        // 只考慮瀏覽器的類型
                        this._collector.emit(BROWSER_COUNT_STREAM,input,new Values(day,hour,minute,browserName));
    
                        if(browserVersion!=null && !"".equals(browserVersion)){
                            this._collector.emit(BROWSER_COUNT_STREAM,input,new Values(day,hour,minute,
                                    browserName+"_"+browserVersion));
                        }
                    }
    
                    String osName = userAgentInfo.getOsName();
                    String osVersion = userAgentInfo.getOsVersion();
    
                    if(osName!= null && !"".equals(osName)){
                        this._collector.emit(OS_COUNT_STREAM,input,new Values(day,hour,minute, osName));
    
                        if(osVersion != null && !"".equals(osVersion)){
                            this._collector.emit(OS_COUNT_STREAM,input,new Values(day,hour,minute,osName+"_"+osVersion));
                        }
                    }
                }
            }
    
            this._collector.ack(input);
        }
    
        @Override
        public void cleanup() {
    
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declareStream(BROWSER_COUNT_STREAM,new Fields(DAY,HOUR,MINUTE,BROWSER));
            declarer.declareStream(OS_COUNT_STREAM,new Fields(DAY,HOUR,MINUTE,OS));
        }
    
        @Override
        public Map<String, Object> getComponentConfiguration() {
            return null;
        }
    }
    
  3. CountKpiBolt類

    package com.ibeifeng.bigdata.storm.weglog;
    
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.IRichBolt;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.tuple.Fields;
    import backtype.storm.tuple.Tuple;
    import backtype.storm.tuple.Values;
    
    import java.util.HashMap;
    import java.util.Iterator;
    import java.util.Map;
    import static com.ibeifeng.bigdata.storm.weglog.WebLogConstants.*;
    
    /**
     * 通用的計數Bolt
     * Created by ad on 2016/12/17.
     */
    public class CountKpiBolt implements IRichBolt {
    
        private String kpiType;
    
        //TODO 優化 替換爲內存數據庫 redis
        private Map<String,Integer> kpiCounts;
    
        private String currentDay = "";
    
        private OutputCollector _collector;
    
        public CountKpiBolt(String kpiType){
            this.kpiType = kpiType;
        }
    
        @Override
        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
            this.kpiCounts = new HashMap<>();
            this._collector = collector;
        }
    
        @Override
        public void execute(Tuple input) {
            String day = input.getStringByField("day");
            String hour = input.getStringByField("hour");
            String minute = input.getStringByField("minute");
            String kpi = input.getString(3);
    
            String kpiByDay = day + "_" + kpi;
            String kpiByHour = hour +"_" + kpi;
            String kpiByMinute = minute + "_" + kpi;
    
    
            // 隔天清理內存
            if(!currentDay.equals(day)){
                // 說明隔天了
                Iterator<Map.Entry<String,Integer>> iter = kpiCounts.entrySet().iterator();
                while(iter.hasNext()){
                    Map.Entry<String,Integer> entry = iter.next();
                    if(entry.getKey().startsWith(currentDay)){
                        iter.remove();
                    }
                }
            }
    
            currentDay = day;
    
            int kpiCountByDay = 0;
            int kpiCountByHour = 0;
            int kpiCountByMinute = 0;
    
            if(kpiCounts.containsKey(kpiByDay)){
                kpiCountByDay = kpiCounts.get(kpiByDay);
            }
            if(kpiCounts.containsKey(kpiByHour)){
                kpiCountByHour = kpiCounts.get(kpiByHour);
            }
    
            if(kpiCounts.containsKey(kpiByMinute)){
                kpiCountByMinute = kpiCounts.get(kpiByMinute);
            }
    
            kpiCountByDay ++;
            kpiCountByHour ++;
            kpiCountByMinute ++;
    
            kpiCounts.put(kpiByDay, kpiCountByDay);
            kpiCounts.put(kpiByHour, kpiCountByHour);
            kpiCounts.put(kpiByMinute,kpiCountByMinute);
    
            this._collector.emit(input, new Values(kpiType+"_" + kpiByDay, kpiCountByDay));
            this._collector.emit(input, new Values(kpiType+"_" + kpiByHour, kpiCountByHour));
            this._collector.emit(input, new Values(kpiType+"_" + kpiByMinute, kpiCountByMinute));
    
            this._collector.ack(input);
        }
    
        @Override
        public void cleanup() {
    
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields(SERVERTIME_KPI, KPI_COUNTS));
        }
    
        @Override
        public Map<String, Object> getComponentConfiguration() {
            return null;
        }
    }
    
  4. SavaBolt類

    ```
    package com.ibeifeng.bigdata.storm.weglog;
    
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.IBasicBolt;
    import backtype.storm.topology.IRichBolt;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.tuple.Tuple;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.client.HTable;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
    import org.apache.hadoop.hbase.util.Bytes;
    
    import java.io.IOException;
    import java.io.InterruptedIOException;
    import java.util.Map;
    
    import static  com.ibeifeng.bigdata.storm.weglog.WebLogConstants.*;
    /**
     * 將數據存儲到HBase數據庫中
     * Created by ad on 2016/12/17.
     */
    public class SaveBolt implements IRichBolt{
    
        private HTable table;
    
        private OutputCollector _collector;
    
        @Override
        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
            Configuration configuration = HBaseConfiguration.create();
            try {
                table = new HTable(configuration,HBASE_TABLENAME);
            } catch (IOException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
    
            this._collector = collector;
        }
    
        @Override
        public void execute(Tuple input) {
    
            String serverTimeAndKpi = input.getStringByField(SERVERTIME_KPI);
            Integer kpiCounts = input.getIntegerByField(KPI_COUNTS);
            System.err.println("serverTimeAndKpi=" + serverTimeAndKpi + ", kpiCounts=" + kpiCounts);
    
            if(serverTimeAndKpi!= null && kpiCounts != null){
    
                Put put = new Put(Bytes.toBytes(serverTimeAndKpi));
                String columnQuelifier = serverTimeAndKpi.split("_")[0];
                put.add(Bytes.toBytes(COLUMN_FAMILY),
                        Bytes.toBytes(columnQuelifier),Bytes.toBytes(""+kpiCounts));
    
                try {
                    table.put(put);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
    
            this._collector.ack(input);
        }
    
        @Override
        public void cleanup() {
            if(table!= null){
                try {
                    table.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
    
        }
    
        @Override
        public Map<String, Object> getComponentConfiguration() {
            return null;
        }
    }
    
    ```
    
  5. 常量類WebLogConstants

    package com.ibeifeng.bigdata.storm.weglog;
    
    /**
     * 常量類
     * Created by ad on 2016/12/17.
     */
    public class WebLogConstants {
    
        public static final String KAFKA_SPOUT_ID = "kafkaSpoutId";
        public static final String WEB_LOG_PARSER_BOLT = "webLogParserBolt";
        public static final String COUNT_IP_BOLT = "countIpBolt";
        public static final String COUNT_BROWSER_BOLT = "countBrowserBolt";
        public static final String COUNT_OS_BOLT = "countOsBolt";
    
        public static final String USER_AGENT_PARSER_BOLT = "userAgentParserBolt";
    
        public static final String SAVE_BOLT = "saveBolt";
    
    
        // 流ID
        public  static final String IP_COUNT_STREAM = "ipCountStream";
        public  static final String URL_PARSER_STREAM = "urlParserStream";
        public  static final String HTTPREFER_PARSER_STREAM = "httpReferParserStream";
        public  static final String USERAGENT_PARSER_STREAM = "userAgentParserStream";
        public  static final String BROWSER_COUNT_STREAM = "browserCountStream";
        public  static final String OS_COUNT_STREAM = "osCountStream";
    
    
    
        // tuple key名稱
        public static final String DAY = "day";
        public static final String HOUR = "hour";
    
        public static final String MINUTE = "minute";
    
        public static final String IP = "ip";
        public static final String REQUEST_URL = "requestUrl";
        public static final String HTTP_REFER = "httpRefer";
        public static final String USERAGENT = "userAgent";
        public static final String BROWSER = "browser";
        public static final String OS = "os";
    
        public static final String SERVERTIME_KPI = "serverTimeAndKpi";
        public static final String KPI_COUNTS = "kpiCounts";
    
    
    
    
        // kpi類型
        public static final String IP_KPI = "I";
        public static final String URL_KPI = "U";
        public static final String BROWSER_KPI = "B";
        public static final String OS_KPI = "O";
    
    
        // 表名稱
        public static final String HBASE_TABLENAME = "weblogstatictis";
        public static final String COLUMN_FAMILY = "info";
    
    }
    
  6. 測試類WebLogStatictis

    package com.ibeifeng.bigdata.storm.weglog;
    
    import backtype.storm.Config;
    import backtype.storm.LocalCluster;
    import backtype.storm.StormSubmitter;
    import backtype.storm.generated.AlreadyAliveException;
    import backtype.storm.generated.InvalidTopologyException;
    import backtype.storm.generated.StormTopology;
    import backtype.storm.spout.SchemeAsMultiScheme;
    import backtype.storm.topology.IRichSpout;
    import backtype.storm.topology.TopologyBuilder;
    import backtype.storm.tuple.Fields;
    
    import storm.kafka.*;
    
    import static com.ibeifeng.bigdata.storm.weglog.WebLogConstants.*;
    import java.util.UUID;
    
    /**
     * Created by ad on 2016/12/17.
     */
    public class WebLogStatictis {
    
        public static void main(String[] args) {
    
            WebLogStatictis webLogStatictis = new WebLogStatictis();
            StormTopology topology = webLogStatictis.buildTopology();
    
            Config conf = new Config();
    
            //conf.setNumAckers(4);
            if(args == null || args.length == 0){
                // 本地執行
                //conf.setMessageTimeoutSecs(1); // tuple發射超時時間
                LocalCluster localCluster = new LocalCluster();
                localCluster.submitTopology("webloganalyse", conf , topology);
            }else{
                // 提交到集羣上執行
                conf.setNumWorkers(4); // 指定使用多少個進程來執行該Topology
                try {
                    StormSubmitter.submitTopology(args[0],conf, topology);
                } catch (AlreadyAliveException e) {
                    e.printStackTrace();
                } catch (InvalidTopologyException e) {
                    e.printStackTrace();
                }
            }
        }
    
        /**
         * 構造一個kafkaspout
         * @return
         */
        private IRichSpout generateSpout(){
            BrokerHosts hosts = new ZkHosts("bigdata01.com:2181");
            String topic = "nginxlog";
            String zkRoot = "/" + topic;
            String id = UUID.randomUUID().toString();
            SpoutConfig spoutConf = new SpoutConfig(hosts,topic,zkRoot,id);
            spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme()); // 按字符串解析
            spoutConf.forceFromStart = true;//從頭開發消費
            KafkaSpout kafkaSpout = new KafkaSpout(spoutConf);
            return kafkaSpout;
        }
    
        private StormTopology buildTopology(){
            // 構造Topology
            TopologyBuilder builder = new TopologyBuilder();
            // 指定Spout
            builder.setSpout(KAFKA_SPOUT_ID, generateSpout());
    
            builder.setBolt(WEB_LOG_PARSER_BOLT,new LogParserBolt())
                .shuffleGrouping(KAFKA_SPOUT_ID);
    
            // 將countIPBolt
            builder.setBolt(COUNT_IP_BOLT, new CountKpiBolt(IP_KPI))
                    .fieldsGrouping(WEB_LOG_PARSER_BOLT, IP_COUNT_STREAM, new Fields(IP));
    
    
            // userAgentParserBolt
            builder.setBolt(USER_AGENT_PARSER_BOLT, new UserAgentParserBolt())
                    .shuffleGrouping(WEB_LOG_PARSER_BOLT,USERAGENT_PARSER_STREAM);
    
            builder.setBolt(COUNT_BROWSER_BOLT,new CountKpiBolt(BROWSER_KPI))
                    .fieldsGrouping(USER_AGENT_PARSER_BOLT,BROWSER_COUNT_STREAM, new Fields(BROWSER));
    
            builder.setBolt(COUNT_OS_BOLT,new CountKpiBolt(OS_KPI))
                    .fieldsGrouping(USER_AGENT_PARSER_BOLT,OS_COUNT_STREAM, new Fields(OS));
    
            builder.setBolt(SAVE_BOLT ,new SaveBolt(),3)
                    .shuffleGrouping(COUNT_IP_BOLT)
                    .shuffleGrouping(COUNT_BROWSER_BOLT)
                    .shuffleGrouping(COUNT_OS_BOLT)
            ;
            return builder.createTopology();
        }
    }
    
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章