Flink按統計Kafka中每小時的數量並輸出到MySQL

基於Flink1.9,統計某個日誌中每小時的數據量,並輸出到MySQL。

主函數

public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);//時間爲事件時間
        env.enableCheckpointing(500);//設置檢查點
        Properties properties = Property.getKafkaProperties(Constants.GET_USER_AUTH_NAME);
        FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<String>(Constants.TOPIC_USER_AUTH, new SimpleStringSchema(), properties);
        consumer.setStartFromGroupOffsets();
        DataStream<String> edits = env.addSource(consumer).name("user_auth_edit");
        DataStream<UserAuth> calStream = edits
                .filter(new FilterFunction<String>() {  ///把時間異常過濾,防止因時間異常阻塞
                    @Override
                    public boolean filter(String s) throws Exception {
                        try {
                            JSONObject jsonObject = JSONObject.parseObject(s);
                            String ts = jsonObject.getString("ts");
                            Pattern resultPattern = Pattern.compile("\\d{10}");
                            Matcher resultMatcher = resultPattern.matcher(ts);
                            //過濾時間大於現在的,避免異常的未來數據影響到時間窗口。容忍一個小時的時間差
                            if (!resultMatcher.matches()) {
                                return false;
                            }
                            int now = (int) (System.currentTimeMillis() / 1000) + 30 * 60;//避免服務器時間不準確,設立時間差
                            int time = Integer.parseInt(ts);
                            return now >= time;
                        } catch (Exception e) {
                            System.out.println("Filter failed");
                            System.err.println(e + " " + e.getMessage());
                            return false;
                        }
                    }
                })
                .assignTimestampsAndWatermarks(new TaskTimestamp(Time.hours(0)))
                .windowAll(TumblingEventTimeWindows.of(Time.hours(1)))
                .allowedLateness(Time.hours(2))
                .aggregate(new UserAuthCount());
        calStream.addSink(new SinkToMySQL());
        env.execute("executed user auth");
        System.out.println("executed");
    }

UserAuth.java

public class UserAuthCount implements AggregateFunction<String, UserAuth, UserAuth> {

    @Override
    public UserAuth createAccumulator()
    {
        return new UserAuth(0L);
    }

    @Override
    public UserAuth add(String s, UserAuth userAuth) {
        if(userAuth.getNumbers()==0L){
            JSONObject jsonObject = JSONObject.parseObject(s);
            SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHH");
            String time_hour = sdf.format(new Date(Long.valueOf(jsonObject.getString("ts")) * 1000L));
            userAuth.setTime_hour(time_hour);
            userAuth.setNumbers(1L);
        }
        else{
            userAuth.setNumbers(userAuth.getNumbers()+1L);
        }
        return userAuth;
    }

    @Override
    public UserAuth getResult(UserAuth userAuth)
    {
        System.out.println("result is "+userAuth);
        return userAuth;
    }

    @Override
    public UserAuth merge(UserAuth userAuth, UserAuth acc1) {
        return null;
    }
}

解讀:Window窗口AggregateFunction 實現消息事件的次數和累計值 

 @param <IN>  The type of the values that are aggregated (input values) 可以理解爲輸入流數據類型,例子中爲SimpleEvent
 @param <ACC> The type of the accumulator (intermediate aggregate state). accumulator累加器的類別,本例中爲一個複合類,包括key,count,sum分別對應ID_NO,事件次數,時間累計值(總金額)
 @param <OUT> The type of the aggregated result 聚合結果類別

public interface AggregateFunction<IN, ACC, OUT> extends Function, Serializable

 SinkToMySQL.java

public class SinkToMySQL extends RichSinkFunction<UserAuth> implements SinkFunction<UserAuth> {
    private static PreparedStatement ps;
    private static PreparedStatement selectps;
    private static PreparedStatement updateps;
    private static Connection connection = null;

    /**
     * open() 方法中建立連接,這樣不用每次 invoke 的時候都要建立連接和釋放連接
     *
     * @param parameters
     * @throws Exception
     */
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        try {
            connection = getConnection();
            String selectSql = "select id,time_hour, numbers from auth_time where time_hour = ?;";
            selectps = connection.prepareStatement(selectSql);
            String sql = "insert into auth_time(time_hour, numbers,create_time,update_time) values(?, ?,?,?);";
            ps = connection.prepareStatement(sql);
            String updateSql = "update auth_time set numbers = ?,update_time = ? where time_hour = ?;";
            updateps = connection.prepareStatement(updateSql);
        } catch (Exception e) {
            System.out.println("sinkToMysql open is error " + e.getMessage());
        }
    }
    private static Connection getConnection() {
        try {
            //加載數據庫驅動
            Class.forName(Property.getStrValue("mysql.classname"));
            String url = Property.getMysqlUrl();
            //獲取連接
            connection = DriverManager.getConnection(url);
            System.out.println("數據庫連接建立成功");
        } catch (Exception e) {
            System.out.println("-----------mysql get connection has exception , msg = " + e.getMessage());
        }
        return connection;
    }

    @Override
    public void close() throws Exception {
        super.close();
        //關閉連接和釋放資源
        try {
            if (ps != null) {
                System.out.println("關閉插入ps");
                ps.close();
            }
            if (selectps != null) {
                System.out.println("關閉查詢ps");
                selectps.close();
            }
            if (updateps != null) {
                System.out.println("關閉更新ps");
                updateps.close();
            }
            if (connection != null && !connection.isClosed()) {
                System.out.println("關閉數據庫連接");
                connection.close();
            }
        } catch (Exception e) {
            System.out.println("close mysql is error "+e.getMessage());
        }
    }

    /**
     * 每條數據的插入都要調用一次 invoke() 方法
     *
     * @param userAuth
     * @param context
     * @throws Exception
     */
    @Override
    public void invoke(UserAuth userAuth, Context context) throws SQLException {
        ResultSet resultSet = null;
        try {
            //判斷是否連接,嘗試重連
            try{
                selectps.setString(1, userAuth.getTime_hour());
            }catch  (MySQLNonTransientConnectionException e){
                System.out.println("mysql 連接過期,重新連接");
            }
            //遍歷數據集合
            selectps.setString(1, userAuth.getTime_hour());
            resultSet = selectps.executeQuery();

            if (!resultSet.next()) {
                ps.setString(1, userAuth.getTime_hour());
                ps.setLong(2, userAuth.getNumbers());
                ps.setInt(3, (int) (System.currentTimeMillis() / 1000));
                ps.setInt(4, (int) (System.currentTimeMillis() / 1000));
                System.out.println(ps);
                ps.execute();
            } else {
                System.out.println("已有該時段數據");
                long resultAuthNum = resultSet.getLong("numbers");
                if (resultAuthNum < userAuth.getNumbers()) {
                    System.out.println("Add number");
                    updateps.setLong(1, userAuth.getNumbers());
                    updateps.setInt(2, (int) (System.currentTimeMillis() / 1000));
                    updateps.setString(3, userAuth.getTime_hour());
                    System.out.println(updateps);
                    updateps.execute();
                }
            }
        } catch (Exception e) {
            System.err.println("invoke is error " + e.getMessage());
        } finally {
            if (resultSet != null){
                resultSet.close();
            }
        }
    }

解讀:此處爲自定義sinkFunction,繼承AbstractRichFunction,是一個抽象類,實現了RichFunction接口。

1、open方法,進行初始化;2、invoke方法,進行record輸出

測試:

模擬向kafka中生產13:00到21:00的數據,數據結構如下:

{
	"subject_id": "test",
	"subject_name": "test",
	"resource_id": "test",
	"client_ip": "234.215.14.137",
	"timestamp": "2020-03-17T16:02:32+0800",
	"ts": "1584432152"
}

結果:

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章