ServiceKafka
/*
* www.unisinsight.com Inc.
* Copyright (c) 2018 All Rights Reserved
*/
package com.test.flink.service.sql;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.StreamQueryConfig;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.functions.TemporalTableFunction;
/**
* description
*
* @author yuwei [[email protected]]
* @date 2020/03/10 10:56
* @since 1.0 2017-11-26T01:00:00Z
*/
public class ServiceKafka {
public static void main(String[] args) {
StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
// 必須要指定這個,不然遇到需要用EventTime的sql的時候不會輸出結果
bsEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);
String systemSource = "CREATE TABLE system_table (\n" +
" systemId BIGINT,\n" +
" systemName VARCHAR,\n" +
" recordTime TIMESTAMP(3),\n" +
" proctime as PROCTIME(),\n" +
" WATERMARK FOR recordTime as recordTime - INTERVAL '5' SECOND " +
") WITH (\n" +
" 'connector.type' = 'kafka',\n" +
" 'connector.version' = 'universal',\n" +
" 'connector.topic' = 'system',\n" +
" 'connector.startup-mode' = 'latest-offset',\n" +
" 'connector.properties.zookeeper.connect' = 'cdhslave02.unisinsight.com:2181',\n" +
" 'connector.properties.bootstrap.servers' = 'cdhslave02.unisinsight.com:9092',\n" +
" 'format.type' = 'json',\n" +
" 'format.derive-schema' = 'true' " +
")";
String iduSource = "CREATE TABLE idu_table (\n" +
" iduId BIGINT,\n" +
" iduName VARCHAR,\n" +
" systemId BIGINT,\n" +
" recordTime TIMESTAMP(3),\n" +
" proctime as PROCTIME(),\n" +
" WATERMARK FOR recordTime as recordTime - INTERVAL '5' SECOND " +
") WITH (\n" +
" 'connector.type' = 'kafka',\n" +
" 'connector.version' = 'universal',\n" +
" 'connector.topic' = 'idu',\n" +
" 'connector.startup-mode' = 'latest-offset',\n" +
" 'connector.properties.zookeeper.connect' = 'cdhslave02.unisinsight.com:2181',\n" +
" 'connector.properties.bootstrap.servers' = 'cdhslave02.unisinsight.com:9092',\n" +
" 'format.type' = 'json',\n" +
" 'format.derive-schema' = 'true' " +
")";
String moduleSource = "CREATE TABLE module_table (\n" +
" moduleId BIGINT,\n" +
" moduleName VARCHAR,\n" +
" systemId BIGINT,\n" +
" recordTime TIMESTAMP(3),\n" +
" proctime as PROCTIME(),\n" +
" WATERMARK FOR recordTime as recordTime - INTERVAL '5' SECOND " +
") WITH (\n" +
" 'connector.type' = 'kafka',\n" +
" 'connector.version' = 'universal',\n" +
" 'connector.topic' = 'module',\n" +
" 'connector.startup-mode' = 'latest-offset',\n" +
" 'connector.properties.zookeeper.connect' = 'cdhslave02.unisinsight.com:2181',\n" +
" 'connector.properties.bootstrap.servers' = 'cdhslave02.unisinsight.com:9092',\n" +
" 'format.type' = 'json'\n" +
")";
// 一個問題就是時間長了狀態太大,對狀態加過期時間 注意的是,這個時間的按照Processing-time來清理數據,意思和數據沒關係,定時清理一下
bsTableEnv.sqlUpdate(systemSource);
bsTableEnv.sqlUpdate(iduSource);
bsTableEnv.sqlUpdate(moduleSource);
String systemSql = " select * from system_table limit 10";
String iduSql = " select *, TO_TIMESTAMP(recordTime)as eventTime from idu_table limit 10";
String moduleSql = " select * , TO_TIMESTAMP(recordTime)as eventTime from module_table limit 10";
// String joinSql = " select system_table.*,idu_table.iduId,idu_table.iduName, TO_TIMESTAMP(idu_table.recordTime)as eventTime from system_table " +
// "inner join idu_table on system_table.systemId=idu_table.systemId and idu_table.recordTime=system_table.recordTime ";
// do logic
// 不能給表起別名,有點可惜。
StreamQueryConfig streamQueryConfig = new StreamQueryConfig().withIdleStateRetentionTime(Time.hours(10), Time.hours(210));
// no window group,結果是回撤流,如果需要insert,則需要定期清理狀態streamQueryConfig,否則狀態會無限增大
// count(distinct xxx) 也需要定期清理狀態
// bsTableEnv.sqlUpdate(systemInsert,streamQueryConfig);
String systemName_cnt = "select systemName,count(*)cnt from system_table " +
"GROUP BY systemName";
Table table1 = bsTableEnv.sqlQuery(systemName_cnt);
// table1.printSchema();
// bsTableEnv.toRetractStream(table1, Row.class).print();
// 非固定窗口計算 各個系統下各個內機每分鐘報故障的次數
String tumble_cnt2 = "select count(*) OVER w as cnt,* from system_table" +
" " +
"WINDOW w AS (\n" +
" PARTITION BY systemName\n" +
" ORDER BY proctime\n" +
" ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)";
Table joinTable2 = bsTableEnv.sqlQuery(tumble_cnt2);
// TUMBLE window group 結果是追加流,窗口結束自動清理狀態
String tumble_cnt = "select systemName,TUMBLE_START(recordTime, INTERVAL '5' SECOND)as window_startTime,count(*)cnt from system_table " +
"GROUP BY TUMBLE(recordTime, INTERVAL '5' SECOND),systemName";
Table table2 = bsTableEnv.sqlQuery(tumble_cnt);
// table2.printSchema();
// bsTableEnv.toRetractStream(table2, Row.class).print();
// HOT window group 結果是追加流,滑動窗口結束自動清理狀態
// HOP(time_attr, interval滑動間隔, interval窗口大小)
String hop_cnt = "select systemName,count(*)cnt,HOP_START(recordTime, INTERVAL '7' SECOND,INTERVAL '10' SECOND)as window_startTime,HOP_END(recordTime, INTERVAL '7' SECOND,INTERVAL '10' SECOND)as window_endTime from system_table " +
"GROUP BY HOP(recordTime, INTERVAL '7' SECOND,INTERVAL '10' SECOND),systemName";
Table table3 = bsTableEnv.sqlQuery(hop_cnt);
// table3.printSchema();
// bsTableEnv.toRetractStream(table3, Row.class).print();
// SESSION window group 結果是追加流,滑動窗口結束自動清理狀態
// SESSION(time_attr, interval窗口超時時間:如果有N秒沒有接收到該group數據,則觸發計算輸出)
String session_cnt = "select systemName,count(*)cnt,session_START(recordTime, INTERVAL '10' SECOND)as window_startTime,session_END(recordTime, INTERVAL '10' SECOND)as window_endTime from system_table " +
"GROUP BY session(recordTime, INTERVAL '10' SECOND),systemName";
Table table10 = bsTableEnv.sqlQuery(session_cnt);
// table10.printSchema();
// bsTableEnv.toRetractStream(table10, Row.class).print();
// 多留join 結果是追加流,UTC時間轉標準時間
// 內置函數:https://help.aliyun.com/knowledge_detail/62769.html?spm=a2c4g.11186631.2.5.4b932834JhFGKV
String joinSql = " select system_table.*,idu_table.iduId,idu_table.iduName, DATE_FORMAT(idu_table.recordTime,'yyyy-MM-dd HH:mm:ss')as eventTime from system_table " +
"inner join idu_table on system_table.systemId=idu_table.systemId and idu_table.recordTime=system_table.recordTime ";
// 固定窗口計算 各個系統下各個內機每分鐘報故障的次數
String tumble_cnt1 = "select systemName,iduName,count(*)cnt,TUMBLE_START(recordTime, INTERVAL '1' MINUTE)as window_startTime,tumble_end(recordTime, INTERVAL '1' MINUTE)as window_endTime from (" + joinSql + ")" +
"GROUP BY TUMBLE(recordTime, INTERVAL '1' MINUTE),systemName,iduName";
Table joinTable = bsTableEnv.sqlQuery(tumble_cnt1);
joinTable.printSchema();
// 創建mysql維表 student
String sqlSource = "CREATE TABLE student (\n" +
" id BIGINT," +
" name VARCHAR," +
" age INT," +
" recordtime TIMESTAMP(3)," +
" route_address VARCHAR," +
" WATERMARK FOR recordtime as recordtime - INTERVAL '5' SECOND " +
") WITH (\n" +
" 'connector.type' = 'jdbc',\n" +
" 'connector.url' = 'jdbc:mysql://192.168.108.140:3306/yuwei',\n" +
" 'connector.table' = 'student',\n" +
" 'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
" 'connector.username' = 'root',\n" +
" 'connector.password' = 'passwd',\n" +
" 'connector.lookup.cache.max-rows' = '5000',\n" +
" 'connector.lookup.cache.ttl' = '1min'\n" +
")";
bsTableEnv.sqlUpdate(sqlSource);
// join mysql
// 參考:https://msd.misuland.com/pd/3053059875815818742
// 臨時表概念:https://ci.apache.org/projects/flink/flink-docs-release-1.10/dev/table/sql/queries.html#aggregations
// 也可以用cast()強制轉換
// 普通關聯
// 如果維表數據變了(且connector.lookup.cache.max-rows=-1),這裏的關聯結果不會變。
String joinMysql = "select systemId,systemName,DATE_FORMAT(s_etime,'yyyy-MM-dd HH:mm:ss')AS s_etime,name,route_address,DATE_FORMAT(m_etime,'yyyy-MM-dd HH:mm:ss')AS m_etime from(" +
"select systemId,systemName,to_timestamp(DATE_FORMAT(recordTime,'yyyy-MM-dd HH:mm:ss'))as s_etime from system_table)" +
"inner join (select to_timestamp(DATE_FORMAT(recordtime,'yyyy-MM-dd HH:mm:ss'))as m_etime,id,name,route_address from student) " +
// "FOR SYSTEM_TIME AS OF system_table.proctime " +
"on systemId=id and s_etime>m_etime + INTERVAL '0' HOUR " +
" ";
// 臨時維表關聯 意思是當前關聯的student是當前時刻的數據庫錶快照,
// 如果維表數據變了(且connector.lookup.cache.max-rows=-1),這裏的關聯結果也會立即變。
// 如果維表有多個版本,則流中的一條數據關聯後就會有多條結果
// FOR SYSTEM_TIME AS OF U.proctime :只支持處理時間(proctime)
String joinMysql1 = "select U.*,C.* from " +
" system_table AS U " +
"left join student " +
"FOR SYSTEM_TIME AS OF U.proctime AS C " +
"on U.systemId=C.id " +
" ";
// 時態表關聯
// //設置Temporal Table的時間屬性和主鍵
TemporalTableFunction temporalTableFunction = bsTableEnv.sqlQuery("select * from student")
// 事件時間
.createTemporalTableFunction("recordtime", "id");
bsTableEnv.registerFunction("studentFunc", temporalTableFunction);
// 流表關聯時態表 如果student表裏面各個主鍵有多個版本的數據,則關聯 小於等於流表事件時間條件下 維表最新那個版本的數據
// 如果維表有多個版本,則流中的一條數據關聯後就只會有1條結果
// 需要注意的是:當使用事件時間時維表的新增修改,並不能被動態自動更新到flink註冊的student表,使用process time 才能自動更新。
// 關聯條件:A.systemId=B.id 且 select *from student as a where a.recordtime=(select max(recordtime) from student as b where a.id=b.id and b.recordtime<=system_table.recordTime);
String sqlTemp = "select A.*,B.* FROM " +
"system_table as A,LATERAL TABLE(studentFunc(A.recordTime)) as B " +
"where A.systemId=B.id";
Table joinMysqltable = bsTableEnv.sqlQuery(sqlTemp);
//joinMysqltable.printSchema();
//bsTableEnv.toAppendStream(joinMysqltable, Row.class).print();
try {
bsTableEnv.execute("job");
} catch (Exception e) {
e.printStackTrace();
}
}
}
時態表關聯
/*
* www.unisinsight.com Inc.
* Copyright (c) 2018 All Rights Reserved
*/
package com.test.flink.service.sql.join_mysql;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.functions.TemporalTableFunction;
import org.apache.flink.types.Row;
/**
* description
* Process time 是指正在執行相應操作的機器的系統時間(也稱爲“掛鐘時間”)。
* Event time 是指基於附在每行上的時間戳對流數據進行處理。時間戳可以在事件發生時進行編碼。
* Ingestion time 是事件進入Flink的時間;在內部,它的處理類似於事件時間。
*
* @author yuwei [[email protected]]
* @date 2020/03/16 14:33
* @since 1.0
*/
public class LATERAL_TABLE_join {
public static void main(String[] args) {
StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
// 必須要指定這個,不然遇到需要用EventTime的sql的時候不會輸出結果
bsEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);
String systemSource = "CREATE TABLE system_table (\n" +
" systemId BIGINT,\n" +
" systemName VARCHAR,\n" +
" recordTime TIMESTAMP(3),\n" +
" proctime as PROCTIME(),\n" +
" WATERMARK FOR recordTime as recordTime - INTERVAL '5' SECOND " +
") WITH (\n" +
" 'connector.type' = 'kafka',\n" +
" 'connector.version' = 'universal',\n" +
" 'connector.topic' = 'system',\n" +
" 'connector.startup-mode' = 'latest-offset',\n" +
" 'connector.properties.zookeeper.connect' = 'cdhslave02.unisinsight.com:2181',\n" +
" 'connector.properties.bootstrap.servers' = 'cdhslave02.unisinsight.com:9092',\n" +
" 'format.type' = 'json',\n" +
" 'format.derive-schema' = 'true' " +
")";
bsTableEnv.sqlUpdate(systemSource);
String sqlSource = "CREATE TABLE student (\n" +
" id BIGINT," +
" name VARCHAR," +
" age INT," +
" recordtime TIMESTAMP(3)," +
" route_address VARCHAR," +
" proctime1 as PROCTIME(),\n" +
" WATERMARK FOR recordtime as recordtime - INTERVAL '5' SECOND " +
") WITH (\n" +
" 'connector.type' = 'jdbc',\n" +
" 'connector.url' = 'jdbc:mysql://192.168.108.140:3306/yuwei',\n" +
" 'connector.table' = 'student',\n" +
" 'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
" 'connector.username' = 'root',\n" +
" 'connector.password' = 'passwd',\n" +
" 'connector.lookup.cache.max-rows' = '100',\n" +
" 'connector.lookup.cache.ttl' = '600second'\n" +
")";
bsTableEnv.sqlUpdate(sqlSource);
// 時態表關聯
// //設置Temporal Table的時間屬性和主鍵
// DataStream<student> dataStream = bsTableEnv.toAppendStream(bsTableEnv.sqlQuery("select id,name,age,recordtime,route_address from student"),Row.class)
// .map(row -> new student());
// Table table = bsTableEnv.fromDataStream(dataStream, "id,name,age,recordtime,route_address,my_proctime.proctime");
Table table =bsTableEnv.sqlQuery("select id,name,age,recordtime,proctime1 from student");
table.printSchema();
TemporalTableFunction temporalTableFunction = table.createTemporalTableFunction("proctime1", "id");
bsTableEnv.registerFunction("studentFunc", temporalTableFunction);
// 流表關聯時態表 如果student表裏面各個主鍵有多個版本的數據,則關聯 小於等於流表事件時間條件下 維表最新那個版本的數據
// 如果維表有多個版本,則流中的一條數據關聯後就只會有1條結果
// 需要注意的是:當使用事件時間時維表的新增修改,並不能被動態自動更新到flink註冊的student表
// 關聯條件:A.systemId=B.id 且 select *from student as a where a.recordtime=(select max(recordtime) from student as b where a.id=b.id and b.recordtime<=system_table.recordTime);
String sqlTemp="select A.*,B.* FROM " +
"system_table as A,LATERAL TABLE(studentFunc(proctime)) as B " +
"where A.systemId=B.id";
Table joinMysqltable = bsTableEnv.sqlQuery(sqlTemp);
joinMysqltable.printSchema();
bsTableEnv.toAppendStream(joinMysqltable, Row.class).print();
try {
bsTableEnv.execute("job");
} catch (Exception e) {
e.printStackTrace();
}
}
}
臨時維表關聯
/*
* www.unisinsight.com Inc.
* Copyright (c) 2018 All Rights Reserved
*/
package com.test.flink.service.sql.join_mysql;
import com.test.flink.udx.CompareCol;
import com.test.flink.udx.SplitFun;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
/**
* description
*
* @author yuwei [[email protected]]
* @date 2020/03/16 14:27
* @since 1.0
*/
public class SYSTEM_TIME_join {
public static void main(String[] args) {
StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment();
// 必須要指定這個,不然遇到需要用EventTime的sql的時候不會輸出結果
bsEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
System.out.println(SYSTEM_TIME_join.class.getName());
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings);
String systemSource = "CREATE TABLE system_table (\n" +
" systemId BIGINT,\n" +
" systemName VARCHAR,\n" +
" recordTime TIMESTAMP(3),\n" +
" proctime as PROCTIME(),\n" +
" WATERMARK FOR recordTime as recordTime - INTERVAL '5' SECOND " +
") WITH (\n" +
" 'connector.type' = 'kafka',\n" +
" 'connector.version' = 'universal',\n" +
" 'connector.topic' = 'system',\n" +
" 'connector.startup-mode' = 'latest-offset',\n" +
" 'connector.properties.zookeeper.connect' = 'cdhslave02.unisinsight.com:2181',\n" +
" 'connector.properties.bootstrap.servers' = 'cdhslave02.unisinsight.com:9092',\n" +
" 'format.type' = 'json',\n" +
" 'format.derive-schema' = 'true' " +
")";
bsTableEnv.sqlUpdate(systemSource);
String sqlSource = "CREATE TABLE student (\n" +
" id BIGINT," +
" name VARCHAR," +
" age INT," +
" recordtime TIMESTAMP(3)," +
" route_address VARCHAR" +
") WITH (\n" +
" 'connector.type' = 'jdbc',\n" +
" 'connector.url' = 'jdbc:mysql://192.168.108.140:3306/yuwei',\n" +
" 'connector.table' = 'student',\n" +
" 'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
" 'connector.username' = 'root',\n" +
" 'connector.password' = 'passwd',\n" +
" 'connector.lookup.cache.max-rows' = '-1',\n" +
" 'connector.lookup.cache.ttl' = '600second'\n" +
")";
bsTableEnv.sqlUpdate(sqlSource);
// 臨時維表關聯 意思是當前關聯的student是當前時刻的數據庫錶快照,
// 如果維表數據變了(且connector.lookup.cache.max-rows=-1),這裏的關聯結果也會立即變。
// 如果維表有多個版本,則流中的一條數據關聯後就會有多條結果
// FOR SYSTEM_TIME AS OF U.proctime :只支持處理時間(proctime)
// 非窗口模式下來一條數據觸發一次計算,只支持等值連接
String joinMysql1 = "select U.*,C.* from " +
" system_table AS U " +
"left join student " +
"FOR SYSTEM_TIME AS OF U.proctime AS C " +
"on U.systemId=C.id " +
" ";
String joinMysql2 = "select * from (select *,row_number() over(partition by id order by recordtime desc)rn" +
" from (select U.*,C.* from " +
" system_table AS U " +
"left join student " +
"FOR SYSTEM_TIME AS OF U.proctime AS C " +
"on U.systemId=C.id " +
"))where rn=1 ";
//
// Table joinMysqltable = bsTableEnv.sqlQuery(joinMysql2);
// joinMysqltable.printSchema();
// bsTableEnv.toRetractStream(joinMysqltable, Row.class).print();
// 動態參數配置表
String config = "CREATE TABLE config (\n" +
" systemId BIGINT," +
" route_address VARCHAR," +
" update_time TIMESTAMP(3)," +
" jobId VARCHAR" +
") WITH (\n" +
" 'connector.type' = 'jdbc',\n" +
" 'connector.url' = 'jdbc:mysql://192.168.108.140:3306/yuwei',\n" +
" 'connector.table' = 'config1',\n" +
" 'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
" 'connector.username' = 'root',\n" +
" 'connector.password' = 'passwd',\n" +
" 'connector.lookup.cache.max-rows' = '-1',\n" +
" 'connector.lookup.cache.ttl' = '600second'\n" +
")";
bsTableEnv.sqlUpdate(config);
// 法一 與 config 這種表結構直接關聯,改參數值,直接影響結果
String joinMysql3 = "select A.* from (select system_table.*,'123' as jobId,student.* from " +
" system_table " +
"left join student " +
"FOR SYSTEM_TIME AS OF system_table.proctime " +
"on system_table.systemId=student.id) as A " +
"left join config " +
" FOR SYSTEM_TIME AS OF A.proctime " +
"on A.jobId=config.jobId " +
" where A.systemId>=config.systemId and A.route_address like config.route_address ";
String config_tb = "CREATE TABLE config_tb (\n" +
" jobId VARCHAR," +
" job_main_class_name VARCHAR," +
" update_time TIMESTAMP(3)," +
" config_info VARCHAR" +
") WITH (\n" +
" 'connector.type' = 'jdbc',\n" +
" 'connector.url' = 'jdbc:mysql://192.168.108.140:3306/yuwei',\n" +
" 'connector.table' = 'config_tb',\n" +
" 'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
" 'connector.username' = 'root',\n" +
" 'connector.password' = 'passwd',\n" +
" 'connector.lookup.cache.max-rows' = '-1',\n" +
" 'connector.lookup.cache.ttl' = '600second'\n" +
")";
bsTableEnv.sqlUpdate(config_tb);
// create udtf function
bsTableEnv.registerFunction("splitFun", new SplitFun());
//bsTableEnv.sqlUpdate("CREATE TEMPORARY FUNCTION IF NOT EXISTS splitFun AS 'com.test.flink.udx.SplitFun' LANGUAGE JAVA");
String realConfigSql = "select A.jobId,A.job_main_class_name,A.update_time,T.systemId,T.route_address" +
" from config_tb as A," +
" lateral table(splitFun(A.config_info)) as T(systemId,route_address)";
String jobMainClassName = SYSTEM_TIME_join.class.getName();
// 法二 與 config_tb 這種表結構直接以臨時表方式關聯,改參數值,直接立即影響結果,通過一個udf來做一些複雜條件的過濾。
bsTableEnv.registerFunction("compareCol",new CompareCol());
String joinMysql4 = "select *from (select A.*,REPLACE(SPLIT_INDEX(SPLIT_INDEX(config_info,',',0),':',1),'\"','') as config_systemId," +
" REPLACE(REPLACE(SPLIT_INDEX(SPLIT_INDEX(config_tb.config_info,',',1),':',1),'\"',''),'}','') as config_route_address from (select system_table.*,'" + jobMainClassName + "' as jobMainClassName,student.* from " +
" system_table " +
"left join student " +
"FOR SYSTEM_TIME AS OF system_table.proctime " +
"on system_table.systemId=student.id) as A " +
"left join config_tb " +
"FOR SYSTEM_TIME AS OF A.proctime " +
"on A.jobMainClassName=config_tb.job_main_class_name) " +
"where systemId>=config_systemId and compareCol(route_address,config_route_address)>0";
// "and A.systemId>= 2 ";
// "and A.route_address like %輕軌站% ";
Table joinMysqltable = bsTableEnv.sqlQuery(joinMysql4);
joinMysqltable.printSchema();
bsTableEnv.explain(joinMysqltable);
bsTableEnv.toRetractStream(joinMysqltable, Row.class).print();
try {
JobExecutionResult job = bsTableEnv.execute(SYSTEM_TIME_join.class.getCanonicalName());
System.out.println("jobId:" + job.getJobID());
} catch (Exception e) {
e.printStackTrace();
}
}
}
事件時間排序
/*
* www.unisinsight.com Inc.
* Copyright (c) 2018 All Rights Reserved
*/
package com.test.flink;
/**
* description
*
* @author yuwei [[email protected]]
* @date 2019/12/19 14:11
* @since 1.0
*/
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import java.time.Instant;
import java.util.Random;
/**
* Desc: 使用 Table / SQL API 根據事件時間和水印對無序流進行排序
* Created by zhisheng on 2019-06-14
* blog:http://www.54tianzhisheng.cn/
* 微信公衆號:zhisheng
*/
public class Sort {
public static final int OUT_OF_ORDERNESS = 1000;
public static void main(String[] args) throws Exception {
// StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(env);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(env, bsSettings);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
SingleOutputStreamOperator<Event> source = env.addSource(new OutOfOrderEventSource())
.assignTimestampsAndWatermarks(new TimestampsAndWatermarks());
Table table = bsTableEnv.fromDataStream(source, "eventTime.rowtime");
bsTableEnv.registerTable("zhisheng", table);
Table sorted = bsTableEnv.sqlQuery("select eventTime from zhisheng order by eventTime");
DataStream<Row> rowDataStream = bsTableEnv.toAppendStream(sorted, Row.class);
rowDataStream.print();
//把執行計劃打印出來
System.out.println(env.getExecutionPlan());
env.execute("sort-streaming-data");
}
public static class Event {
Long eventTime;
Event() {
//構造生成帶有事件時間的數據(亂序)
this.eventTime = Instant.now().toEpochMilli() + (new Random().nextInt(OUT_OF_ORDERNESS));
}
@Override
public String toString() {
return "Event{" +
"eventTime=" + eventTime +
'}';
}
}
/**
* 數據源,這裏不斷的造數據
*/
private static class OutOfOrderEventSource extends RichSourceFunction<Event> {
private volatile boolean running = true;
@Override
public void run(SourceContext<Event> ctx) throws Exception {
while (running) {
ctx.collect(new Event());
Thread.sleep(1);
}
}
@Override
public void cancel() {
running = false;
}
}
/**
* 時間水印
*/
private static class TimestampsAndWatermarks extends BoundedOutOfOrdernessTimestampExtractor<Event> {
public TimestampsAndWatermarks() {
super(Time.milliseconds(OUT_OF_ORDERNESS));
}
@Override
public long extractTimestamp(Event event) {
return event.eventTime;
}
}
}
工具類
SplitFun
/*
* www.unisinsight.com Inc.
* Copyright (c) 2018 All Rights Reserved
*/
package com.test.flink.udx;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.table.functions.TableFunction;
/**
* description
*
* @author yuwei [[email protected]]
* @date 2020/03/19 14:04
* @since 1.0
*/
public class SplitFun extends TableFunction<Tuple2<Long, String>> {
public void eval(String str) {
// config_info : {"systemId": "2", "route_address": "%輕軌站%"}
str = str.replaceAll("\\{", "")
.replaceAll("\\}", "")
.replaceAll("\"", "")
.replaceAll(" ", "");
String[] split = str.split(",");
long first = Long.valueOf(split[0].split(":")[1]);
String second = split[1].split(":")[1];
System.out.println("first:"+first+",second:"+second);
Tuple2<Long, String> tuple2 = Tuple2.of(first, second);
collect(tuple2);
}
}
CompareCol
/*
* www.unisinsight.com Inc.
* Copyright (c) 2018 All Rights Reserved
*/
package com.test.flink.udx;
import cn.hutool.core.util.ObjectUtil;
import org.apache.flink.table.functions.ScalarFunction;
/**
* description
*
* @author yuwei [[email protected]]
* @date 2020/03/19 18:28
* @since 1.0
*/
public class CompareCol extends ScalarFunction {
public Integer eval(String b, String c) {
Integer flag = 0;
if (ObjectUtil.isEmpty(b))
return flag;
flag = b.trim().contains(c.trim()) ? 1 : 0;
return flag;
}
}