flink1.10 sql 例子

ServiceKafka

/*
 * www.unisinsight.com Inc.
 * Copyright (c) 2018 All Rights Reserved
 */
package com.test.flink.service.sql;

import org.apache.flink.api.common.time.Time;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.StreamQueryConfig;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.functions.TemporalTableFunction;

/**
 * description
 *
 * @author yuwei [[email protected]]
 * @date 2020/03/10 10:56
 * @since 1.0 2017-11-26T01:00:00Z
 */
public class ServiceKafka {
    public static void main(String[] args) {
        StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        // 必須要指定這個,不然遇到需要用EventTime的sql的時候不會輸出結果
        bsEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
        StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);

        String systemSource = "CREATE TABLE system_table (\n" +
                "    systemId BIGINT,\n" +
                "    systemName VARCHAR,\n" +
                "    recordTime TIMESTAMP(3),\n" +
                "    proctime as PROCTIME(),\n" +
                "    WATERMARK FOR recordTime as recordTime - INTERVAL '5' SECOND " +
                ") WITH (\n" +
                "    'connector.type' = 'kafka',\n" +
                "    'connector.version' = 'universal',\n" +
                "    'connector.topic' = 'system',\n" +
                "    'connector.startup-mode' = 'latest-offset',\n" +
                "    'connector.properties.zookeeper.connect' = 'cdhslave02.unisinsight.com:2181',\n" +
                "    'connector.properties.bootstrap.servers' = 'cdhslave02.unisinsight.com:9092',\n" +
                "    'format.type' = 'json',\n" +
                "    'format.derive-schema' = 'true' " +
                ")";

        String iduSource = "CREATE TABLE idu_table (\n" +
                "    iduId BIGINT,\n" +
                "    iduName VARCHAR,\n" +
                "    systemId BIGINT,\n" +
                "    recordTime TIMESTAMP(3),\n" +
                "    proctime as PROCTIME(),\n" +
                "    WATERMARK FOR recordTime as recordTime - INTERVAL '5' SECOND " +
                ") WITH (\n" +
                "    'connector.type' = 'kafka',\n" +
                "    'connector.version' = 'universal',\n" +
                "    'connector.topic' = 'idu',\n" +
                "    'connector.startup-mode' = 'latest-offset',\n" +
                "    'connector.properties.zookeeper.connect' = 'cdhslave02.unisinsight.com:2181',\n" +
                "    'connector.properties.bootstrap.servers' = 'cdhslave02.unisinsight.com:9092',\n" +
                "    'format.type' = 'json',\n" +
                "    'format.derive-schema' = 'true' " +
                ")";

        String moduleSource = "CREATE TABLE module_table (\n" +
                "    moduleId BIGINT,\n" +
                "    moduleName VARCHAR,\n" +
                "    systemId BIGINT,\n" +
                "    recordTime TIMESTAMP(3),\n" +
                "    proctime as PROCTIME(),\n" +
                "    WATERMARK FOR recordTime as recordTime - INTERVAL '5' SECOND " +
                ") WITH (\n" +
                "    'connector.type' = 'kafka',\n" +
                "    'connector.version' = 'universal',\n" +
                "    'connector.topic' = 'module',\n" +
                "    'connector.startup-mode' = 'latest-offset',\n" +
                "    'connector.properties.zookeeper.connect' = 'cdhslave02.unisinsight.com:2181',\n" +
                "    'connector.properties.bootstrap.servers' = 'cdhslave02.unisinsight.com:9092',\n" +
                "    'format.type' = 'json'\n" +
                ")";

        // 一個問題就是時間長了狀態太大,對狀態加過期時間 注意的是,這個時間的按照Processing-time來清理數據,意思和數據沒關係,定時清理一下
        bsTableEnv.sqlUpdate(systemSource);
        bsTableEnv.sqlUpdate(iduSource);
        bsTableEnv.sqlUpdate(moduleSource);


        String systemSql = " select * from system_table limit 10";

        String iduSql = " select *, TO_TIMESTAMP(recordTime)as eventTime  from idu_table limit 10";

        String moduleSql = " select * , TO_TIMESTAMP(recordTime)as eventTime from module_table limit 10";

//        String joinSql = " select system_table.*,idu_table.iduId,idu_table.iduName, TO_TIMESTAMP(idu_table.recordTime)as eventTime from system_table " +
//                "inner join idu_table  on system_table.systemId=idu_table.systemId and idu_table.recordTime=system_table.recordTime ";

        // do logic

        // 不能給表起別名,有點可惜。
        StreamQueryConfig streamQueryConfig = new StreamQueryConfig().withIdleStateRetentionTime(Time.hours(10), Time.hours(210));
        // no window group,結果是回撤流,如果需要insert,則需要定期清理狀態streamQueryConfig,否則狀態會無限增大
        // count(distinct xxx) 也需要定期清理狀態
        //  bsTableEnv.sqlUpdate(systemInsert,streamQueryConfig);
        String systemName_cnt = "select systemName,count(*)cnt from system_table " +
                "GROUP BY systemName";
        Table table1 = bsTableEnv.sqlQuery(systemName_cnt);
//         table1.printSchema();
//         bsTableEnv.toRetractStream(table1, Row.class).print();

        // 非固定窗口計算 各個系統下各個內機每分鐘報故障的次數
        String tumble_cnt2 = "select count(*) OVER w as cnt,* from system_table" +
                " " +
                "WINDOW w AS (\n" +
                "  PARTITION BY systemName\n" +
                "  ORDER BY proctime\n" +
                "  ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)";
        Table joinTable2 = bsTableEnv.sqlQuery(tumble_cnt2);

        // TUMBLE window group 結果是追加流,窗口結束自動清理狀態
        String tumble_cnt = "select systemName,TUMBLE_START(recordTime, INTERVAL '5' SECOND)as window_startTime,count(*)cnt from system_table " +
                "GROUP BY TUMBLE(recordTime, INTERVAL '5' SECOND),systemName";
        Table table2 = bsTableEnv.sqlQuery(tumble_cnt);
//        table2.printSchema();
//         bsTableEnv.toRetractStream(table2, Row.class).print();

        // HOT window group 結果是追加流,滑動窗口結束自動清理狀態
        // HOP(time_attr, interval滑動間隔, interval窗口大小)
        String hop_cnt = "select systemName,count(*)cnt,HOP_START(recordTime, INTERVAL '7' SECOND,INTERVAL '10' SECOND)as window_startTime,HOP_END(recordTime, INTERVAL '7' SECOND,INTERVAL '10' SECOND)as window_endTime from system_table " +
                "GROUP BY HOP(recordTime, INTERVAL '7' SECOND,INTERVAL '10' SECOND),systemName";
        Table table3 = bsTableEnv.sqlQuery(hop_cnt);
//        table3.printSchema();
//        bsTableEnv.toRetractStream(table3, Row.class).print();

        // SESSION window group 結果是追加流,滑動窗口結束自動清理狀態
        // SESSION(time_attr, interval窗口超時時間:如果有N秒沒有接收到該group數據,則觸發計算輸出)
        String session_cnt = "select systemName,count(*)cnt,session_START(recordTime, INTERVAL '10' SECOND)as window_startTime,session_END(recordTime, INTERVAL '10' SECOND)as window_endTime from system_table " +
                "GROUP BY session(recordTime, INTERVAL '10' SECOND),systemName";
        Table table10 = bsTableEnv.sqlQuery(session_cnt);
//        table10.printSchema();
//        bsTableEnv.toRetractStream(table10, Row.class).print();

        // 多留join 結果是追加流,UTC時間轉標準時間
        // 內置函數:https://help.aliyun.com/knowledge_detail/62769.html?spm=a2c4g.11186631.2.5.4b932834JhFGKV
        String joinSql = " select system_table.*,idu_table.iduId,idu_table.iduName, DATE_FORMAT(idu_table.recordTime,'yyyy-MM-dd HH:mm:ss')as eventTime from system_table " +
                "inner join idu_table  on system_table.systemId=idu_table.systemId and idu_table.recordTime=system_table.recordTime ";

        // 固定窗口計算 各個系統下各個內機每分鐘報故障的次數
        String tumble_cnt1 = "select systemName,iduName,count(*)cnt,TUMBLE_START(recordTime, INTERVAL '1' MINUTE)as window_startTime,tumble_end(recordTime, INTERVAL '1' MINUTE)as window_endTime from (" + joinSql + ")" +
                "GROUP BY TUMBLE(recordTime, INTERVAL '1' MINUTE),systemName,iduName";
        Table joinTable = bsTableEnv.sqlQuery(tumble_cnt1);
        joinTable.printSchema();

        // 創建mysql維表 student
        String sqlSource = "CREATE TABLE student (\n" +
                "    id BIGINT," +
                "    name VARCHAR," +
                "    age INT," +
                "    recordtime TIMESTAMP(3)," +
                "    route_address VARCHAR," +
                "    WATERMARK FOR recordtime as recordtime - INTERVAL '5' SECOND " +
                ") WITH (\n" +
                "    'connector.type' = 'jdbc',\n" +
                "    'connector.url' = 'jdbc:mysql://192.168.108.140:3306/yuwei',\n" +
                "    'connector.table' = 'student',\n" +
                "    'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
                "    'connector.username' = 'root',\n" +
                "    'connector.password' = 'passwd',\n" +
                "    'connector.lookup.cache.max-rows' = '5000',\n" +
                "    'connector.lookup.cache.ttl' = '1min'\n" +
                ")";

        bsTableEnv.sqlUpdate(sqlSource);


        // join mysql
        // 參考:https://msd.misuland.com/pd/3053059875815818742
        // 臨時表概念:https://ci.apache.org/projects/flink/flink-docs-release-1.10/dev/table/sql/queries.html#aggregations
        // 也可以用cast()強制轉換
        // 普通關聯
        // 如果維表數據變了(且connector.lookup.cache.max-rows=-1),這裏的關聯結果不會變。
        String joinMysql = "select systemId,systemName,DATE_FORMAT(s_etime,'yyyy-MM-dd HH:mm:ss')AS s_etime,name,route_address,DATE_FORMAT(m_etime,'yyyy-MM-dd HH:mm:ss')AS m_etime from(" +
                "select systemId,systemName,to_timestamp(DATE_FORMAT(recordTime,'yyyy-MM-dd HH:mm:ss'))as s_etime from system_table)" +
                "inner join (select to_timestamp(DATE_FORMAT(recordtime,'yyyy-MM-dd HH:mm:ss'))as m_etime,id,name,route_address from student) " +
                // "FOR SYSTEM_TIME AS OF system_table.proctime " +
                "on systemId=id and s_etime>m_etime + INTERVAL '0' HOUR " +
                " ";

        // 臨時維表關聯 意思是當前關聯的student是當前時刻的數據庫錶快照,
        // 如果維表數據變了(且connector.lookup.cache.max-rows=-1),這裏的關聯結果也會立即變。
        // 如果維表有多個版本,則流中的一條數據關聯後就會有多條結果
        // FOR SYSTEM_TIME AS OF U.proctime :只支持處理時間(proctime)
        String joinMysql1 = "select U.*,C.* from " +
                " system_table AS U " +
                "left join  student " +
                "FOR SYSTEM_TIME AS OF U.proctime AS C " +
                "on U.systemId=C.id  " +
                " ";

        // 時態表關聯
        // //設置Temporal Table的時間屬性和主鍵
        TemporalTableFunction temporalTableFunction = bsTableEnv.sqlQuery("select * from student")
                // 事件時間
                .createTemporalTableFunction("recordtime", "id");
        bsTableEnv.registerFunction("studentFunc", temporalTableFunction);

        // 流表關聯時態表 如果student表裏面各個主鍵有多個版本的數據,則關聯 小於等於流表事件時間條件下 維表最新那個版本的數據
        // 如果維表有多個版本,則流中的一條數據關聯後就只會有1條結果
        // 需要注意的是:當使用事件時間時維表的新增修改,並不能被動態自動更新到flink註冊的student表,使用process time 才能自動更新。
        // 關聯條件:A.systemId=B.id 且 select *from student as a where a.recordtime=(select max(recordtime) from student as b where a.id=b.id and b.recordtime<=system_table.recordTime);
        String sqlTemp = "select A.*,B.* FROM " +
                "system_table as A,LATERAL TABLE(studentFunc(A.recordTime)) as B " +
                "where A.systemId=B.id";

        Table joinMysqltable = bsTableEnv.sqlQuery(sqlTemp);
        //joinMysqltable.printSchema();
        //bsTableEnv.toAppendStream(joinMysqltable, Row.class).print();


        try {
            bsTableEnv.execute("job");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
時態表關聯
/*
 * www.unisinsight.com Inc.
 * Copyright (c) 2018 All Rights Reserved
 */
package com.test.flink.service.sql.join_mysql;

import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.functions.TemporalTableFunction;
import org.apache.flink.types.Row;

/**
 * description
 * Process time 是指正在執行相應操作的機器的系統時間(也稱爲“掛鐘時間”)。
 * Event time 是指基於附在每行上的時間戳對流數據進行處理。時間戳可以在事件發生時進行編碼。
 * Ingestion time 是事件進入Flink的時間;在內部,它的處理類似於事件時間。
 *
 * @author yuwei [[email protected]]
 * @date 2020/03/16 14:33
 * @since 1.0
 */
public class LATERAL_TABLE_join {
    public static void main(String[] args) {
        StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        // 必須要指定這個,不然遇到需要用EventTime的sql的時候不會輸出結果
        bsEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
        StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);
        String systemSource = "CREATE TABLE system_table (\n" +
                "    systemId BIGINT,\n" +
                "    systemName VARCHAR,\n" +
                "    recordTime TIMESTAMP(3),\n" +
                "    proctime as PROCTIME(),\n" +
                "    WATERMARK FOR recordTime as recordTime - INTERVAL '5' SECOND " +
                ") WITH (\n" +
                "    'connector.type' = 'kafka',\n" +
                "    'connector.version' = 'universal',\n" +
                "    'connector.topic' = 'system',\n" +
                "    'connector.startup-mode' = 'latest-offset',\n" +
                "    'connector.properties.zookeeper.connect' = 'cdhslave02.unisinsight.com:2181',\n" +
                "    'connector.properties.bootstrap.servers' = 'cdhslave02.unisinsight.com:9092',\n" +
                "    'format.type' = 'json',\n" +
                "    'format.derive-schema' = 'true' " +
                ")";
        bsTableEnv.sqlUpdate(systemSource);

        String sqlSource = "CREATE TABLE student (\n" +
                "    id BIGINT," +
                "    name VARCHAR," +
                "    age INT," +
                "    recordtime TIMESTAMP(3)," +
                "    route_address VARCHAR," +
                "    proctime1 as PROCTIME(),\n" +
                "    WATERMARK FOR recordtime as recordtime - INTERVAL '5' SECOND " +
                ") WITH (\n" +
                "    'connector.type' = 'jdbc',\n" +
                "    'connector.url' = 'jdbc:mysql://192.168.108.140:3306/yuwei',\n" +
                "    'connector.table' = 'student',\n" +
                "    'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
                "    'connector.username' = 'root',\n" +
                "    'connector.password' = 'passwd',\n" +
                "    'connector.lookup.cache.max-rows' = '100',\n" +
                "    'connector.lookup.cache.ttl' = '600second'\n" +
                ")";

        bsTableEnv.sqlUpdate(sqlSource);

        // 時態表關聯
        // //設置Temporal Table的時間屬性和主鍵

//        DataStream<student> dataStream = bsTableEnv.toAppendStream(bsTableEnv.sqlQuery("select id,name,age,recordtime,route_address from student"),Row.class)
//        .map(row -> new student());
//        Table table = bsTableEnv.fromDataStream(dataStream, "id,name,age,recordtime,route_address,my_proctime.proctime");

        Table table =bsTableEnv.sqlQuery("select id,name,age,recordtime,proctime1 from student");
        table.printSchema();
        TemporalTableFunction temporalTableFunction = table.createTemporalTableFunction("proctime1", "id");

        bsTableEnv.registerFunction("studentFunc", temporalTableFunction);

        // 流表關聯時態表 如果student表裏面各個主鍵有多個版本的數據,則關聯 小於等於流表事件時間條件下 維表最新那個版本的數據
        // 如果維表有多個版本,則流中的一條數據關聯後就只會有1條結果
        // 需要注意的是:當使用事件時間時維表的新增修改,並不能被動態自動更新到flink註冊的student表
        // 關聯條件:A.systemId=B.id 且 select *from student as a where a.recordtime=(select max(recordtime) from student as b where a.id=b.id and b.recordtime<=system_table.recordTime);
        String sqlTemp="select A.*,B.* FROM " +
                "system_table as A,LATERAL TABLE(studentFunc(proctime)) as B " +
                "where A.systemId=B.id";

        Table joinMysqltable = bsTableEnv.sqlQuery(sqlTemp);
        joinMysqltable.printSchema();
        bsTableEnv.toAppendStream(joinMysqltable, Row.class).print();

        try {
            bsTableEnv.execute("job");
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}
臨時維表關聯
/*
 * www.unisinsight.com Inc.
 * Copyright (c) 2018 All Rights Reserved
 */
package com.test.flink.service.sql.join_mysql;

import com.test.flink.udx.CompareCol;
import com.test.flink.udx.SplitFun;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

/**
 * description
 *
 * @author yuwei [[email protected]]
 * @date 2020/03/16 14:27
 * @since 1.0
 */
public class SYSTEM_TIME_join {
    public static void main(String[] args) {
        StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        // 必須要指定這個,不然遇到需要用EventTime的sql的時候不會輸出結果
        bsEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        System.out.println(SYSTEM_TIME_join.class.getName());
        EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
        StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);
        String systemSource = "CREATE TABLE system_table (\n" +
                "    systemId BIGINT,\n" +
                "    systemName VARCHAR,\n" +
                "    recordTime TIMESTAMP(3),\n" +
                "    proctime as PROCTIME(),\n" +
                "    WATERMARK FOR recordTime as recordTime - INTERVAL '5' SECOND " +
                ") WITH (\n" +
                "    'connector.type' = 'kafka',\n" +
                "    'connector.version' = 'universal',\n" +
                "    'connector.topic' = 'system',\n" +
                "    'connector.startup-mode' = 'latest-offset',\n" +
                "    'connector.properties.zookeeper.connect' = 'cdhslave02.unisinsight.com:2181',\n" +
                "    'connector.properties.bootstrap.servers' = 'cdhslave02.unisinsight.com:9092',\n" +
                "    'format.type' = 'json',\n" +
                "    'format.derive-schema' = 'true' " +
                ")";
        bsTableEnv.sqlUpdate(systemSource);

        String sqlSource = "CREATE TABLE student (\n" +
                "    id BIGINT," +
                "    name VARCHAR," +
                "    age INT," +
                "    recordtime TIMESTAMP(3)," +
                "    route_address VARCHAR" +
                ") WITH (\n" +
                "    'connector.type' = 'jdbc',\n" +
                "    'connector.url' = 'jdbc:mysql://192.168.108.140:3306/yuwei',\n" +
                "    'connector.table' = 'student',\n" +
                "    'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
                "    'connector.username' = 'root',\n" +
                "    'connector.password' = 'passwd',\n" +
                "    'connector.lookup.cache.max-rows' = '-1',\n" +
                "    'connector.lookup.cache.ttl' = '600second'\n" +
                ")";

        bsTableEnv.sqlUpdate(sqlSource);

        // 臨時維表關聯 意思是當前關聯的student是當前時刻的數據庫錶快照,
        // 如果維表數據變了(且connector.lookup.cache.max-rows=-1),這裏的關聯結果也會立即變。
        // 如果維表有多個版本,則流中的一條數據關聯後就會有多條結果
        // FOR SYSTEM_TIME AS OF U.proctime :只支持處理時間(proctime)
        // 非窗口模式下來一條數據觸發一次計算,只支持等值連接
        String joinMysql1 = "select U.*,C.* from " +
                " system_table AS U " +
                "left join  student " +
                "FOR SYSTEM_TIME AS OF U.proctime AS C " +
                "on U.systemId=C.id " +
                " ";
        String joinMysql2 = "select * from (select *,row_number() over(partition by id order by recordtime desc)rn" +
                " from (select U.*,C.* from " +
                " system_table AS U " +
                "left join  student " +
                "FOR SYSTEM_TIME AS OF U.proctime AS C " +
                "on U.systemId=C.id  " +
                "))where rn=1 ";

//
//        Table joinMysqltable = bsTableEnv.sqlQuery(joinMysql2);
//        joinMysqltable.printSchema();
//        bsTableEnv.toRetractStream(joinMysqltable, Row.class).print();

        // 動態參數配置表
        String config = "CREATE TABLE config (\n" +
                "    systemId BIGINT," +
                "    route_address VARCHAR," +
                "    update_time TIMESTAMP(3)," +
                "    jobId VARCHAR" +
                ") WITH (\n" +
                "    'connector.type' = 'jdbc',\n" +
                "    'connector.url' = 'jdbc:mysql://192.168.108.140:3306/yuwei',\n" +
                "    'connector.table' = 'config1',\n" +
                "    'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
                "    'connector.username' = 'root',\n" +
                "    'connector.password' = 'passwd',\n" +
                "    'connector.lookup.cache.max-rows' = '-1',\n" +
                "    'connector.lookup.cache.ttl' = '600second'\n" +
                ")";
        bsTableEnv.sqlUpdate(config);
        // 法一 與 config 這種表結構直接關聯,改參數值,直接影響結果
        String joinMysql3 = "select A.* from (select system_table.*,'123' as jobId,student.* from " +
                " system_table  " +
                "left join  student " +
                "FOR SYSTEM_TIME AS OF system_table.proctime  " +
                "on system_table.systemId=student.id) as A " +
                "left join config " +
                " FOR SYSTEM_TIME AS OF A.proctime " +
                "on A.jobId=config.jobId " +
                " where A.systemId>=config.systemId and A.route_address like config.route_address ";

        String config_tb = "CREATE TABLE config_tb (\n" +
                "    jobId VARCHAR," +
                "    job_main_class_name VARCHAR," +
                "    update_time TIMESTAMP(3)," +
                "    config_info VARCHAR" +
                ") WITH (\n" +
                "    'connector.type' = 'jdbc',\n" +
                "    'connector.url' = 'jdbc:mysql://192.168.108.140:3306/yuwei',\n" +
                "    'connector.table' = 'config_tb',\n" +
                "    'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
                "    'connector.username' = 'root',\n" +
                "    'connector.password' = 'passwd',\n" +
                "    'connector.lookup.cache.max-rows' = '-1',\n" +
                "    'connector.lookup.cache.ttl' = '600second'\n" +
                ")";
        bsTableEnv.sqlUpdate(config_tb);

        // create udtf function
        bsTableEnv.registerFunction("splitFun", new SplitFun());
        //bsTableEnv.sqlUpdate("CREATE TEMPORARY FUNCTION IF NOT EXISTS splitFun AS 'com.test.flink.udx.SplitFun'  LANGUAGE JAVA");
        String realConfigSql = "select A.jobId,A.job_main_class_name,A.update_time,T.systemId,T.route_address" +
                " from config_tb as A," +
                " lateral table(splitFun(A.config_info)) as T(systemId,route_address)";

        String jobMainClassName = SYSTEM_TIME_join.class.getName();

        // 法二 與 config_tb 這種表結構直接以臨時表方式關聯,改參數值,直接立即影響結果,通過一個udf來做一些複雜條件的過濾。
        bsTableEnv.registerFunction("compareCol",new CompareCol());
        String joinMysql4 = "select *from (select A.*,REPLACE(SPLIT_INDEX(SPLIT_INDEX(config_info,',',0),':',1),'\"','') as config_systemId," +
                " REPLACE(REPLACE(SPLIT_INDEX(SPLIT_INDEX(config_tb.config_info,',',1),':',1),'\"',''),'}','') as config_route_address from (select system_table.*,'" + jobMainClassName + "' as jobMainClassName,student.* from " +
                " system_table  " +
                "left join  student " +
                "FOR SYSTEM_TIME AS OF system_table.proctime  " +
                "on system_table.systemId=student.id) as A " +
                "left join config_tb " +
                "FOR SYSTEM_TIME AS OF A.proctime " +
                "on A.jobMainClassName=config_tb.job_main_class_name) " +
                "where systemId>=config_systemId and compareCol(route_address,config_route_address)>0";
        //  "and A.systemId>= 2 ";
        // "and A.route_address like %輕軌站% ";
        Table joinMysqltable = bsTableEnv.sqlQuery(joinMysql4);
        joinMysqltable.printSchema();
        bsTableEnv.explain(joinMysqltable);
        bsTableEnv.toRetractStream(joinMysqltable, Row.class).print();

        try {
            JobExecutionResult job = bsTableEnv.execute(SYSTEM_TIME_join.class.getCanonicalName());
            System.out.println("jobId:" + job.getJobID());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
事件時間排序

/*
 * www.unisinsight.com Inc.
 * Copyright (c) 2018 All Rights Reserved
 */
package com.test.flink;

/**
 * description
 *
 * @author yuwei [[email protected]]
 * @date 2019/12/19 14:11
 * @since 1.0
 */
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

import java.time.Instant;
import java.util.Random;

/**
 * Desc: 使用 Table / SQL API 根據事件時間和水印對無序流進行排序
 * Created by zhisheng on 2019-06-14
 * blog:http://www.54tianzhisheng.cn/
 * 微信公衆號:zhisheng
 */
public class Sort {

    public static final int OUT_OF_ORDERNESS = 1000;

    public static void main(String[] args) throws Exception {
//        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//        StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(env);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
        StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(env, bsSettings);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1);

        SingleOutputStreamOperator<Event> source = env.addSource(new OutOfOrderEventSource())
                .assignTimestampsAndWatermarks(new TimestampsAndWatermarks());

        Table table = bsTableEnv.fromDataStream(source, "eventTime.rowtime");

        bsTableEnv.registerTable("zhisheng", table);
        Table sorted = bsTableEnv.sqlQuery("select eventTime from zhisheng order by eventTime");
        DataStream<Row> rowDataStream = bsTableEnv.toAppendStream(sorted, Row.class);

        rowDataStream.print();

        //把執行計劃打印出來
        System.out.println(env.getExecutionPlan());

        env.execute("sort-streaming-data");

    }

    public static class Event {

        Long eventTime;

        Event() {
            //構造生成帶有事件時間的數據(亂序)
            this.eventTime = Instant.now().toEpochMilli() + (new Random().nextInt(OUT_OF_ORDERNESS));
        }

        @Override
        public String toString() {
            return "Event{" +
                    "eventTime=" + eventTime +
                    '}';
        }
    }


    /**
     * 數據源,這裏不斷的造數據
     */
    private static class OutOfOrderEventSource extends RichSourceFunction<Event> {

        private volatile boolean running = true;

        @Override
        public void run(SourceContext<Event> ctx) throws Exception {
            while (running) {
                ctx.collect(new Event());
                Thread.sleep(1);
            }
        }

        @Override
        public void cancel() {
            running = false;
        }
    }

    /**
     * 時間水印
     */
    private static class TimestampsAndWatermarks extends BoundedOutOfOrdernessTimestampExtractor<Event> {

        public TimestampsAndWatermarks() {
            super(Time.milliseconds(OUT_OF_ORDERNESS));
        }

        @Override
        public long extractTimestamp(Event event) {
            return event.eventTime;
        }
    }
}

工具類
SplitFun
/*
 * www.unisinsight.com Inc.
 * Copyright (c) 2018 All Rights Reserved
 */
package com.test.flink.udx;

import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.table.functions.TableFunction;

/**
 * description
 *
 * @author yuwei [[email protected]]
 * @date 2020/03/19 14:04
 * @since 1.0
 */
public class SplitFun extends TableFunction<Tuple2<Long, String>> {

    public void eval(String str) {

        // config_info : {"systemId": "2", "route_address": "%輕軌站%"}
        str = str.replaceAll("\\{", "")
                .replaceAll("\\}", "")
                .replaceAll("\"", "")
                .replaceAll(" ", "");
        String[] split = str.split(",");

        long first = Long.valueOf(split[0].split(":")[1]);
        String second = split[1].split(":")[1];
        System.out.println("first:"+first+",second:"+second);
        Tuple2<Long, String> tuple2 = Tuple2.of(first, second);
        collect(tuple2);
    }
}
CompareCol
/*
 * www.unisinsight.com Inc.
 * Copyright (c) 2018 All Rights Reserved
 */
package com.test.flink.udx;

import cn.hutool.core.util.ObjectUtil;
import org.apache.flink.table.functions.ScalarFunction;

/**
 * description
 *
 * @author yuwei [[email protected]]
 * @date 2020/03/19 18:28
 * @since 1.0
 */
public class CompareCol extends ScalarFunction {

    public Integer eval(String b, String c) {

        Integer flag = 0;

        if (ObjectUtil.isEmpty(b))
            return flag;
        flag = b.trim().contains(c.trim()) ? 1 : 0;
        return flag;
    }
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章