Flink sql 基於hbase,mysql的維表實戰 -未完

目前需要用到維表數據,所以選擇hbase和mysql作爲維表數據存儲,目前主要考慮參考官網。

不知道依賴的,可以去參考官網,或者鏈接別人的代碼。

還有此人的博客地址(我稱之爲白斬雞兄),典型的乾貨多博客不火。

https://blog.csdn.net/weixin_47482194/article/details/105854970

github的傳送門:https://github.com/lonelyGhostisdog/flinksql

大概率這個文章也是參考官網跟他的文章寫的,當然都得自己去實踐的,不實踐怎麼發現問題學習呢?

1)sql加載kafka的數據

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
        .useBlinkPlanner()
        .inStreamingMode()
        .build();
StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(env, blinkStreamSettings);

String ddlSource = "CREATE TABLE source_kafka (\n" +
        "    order_key STRING ," +
        "    order_number STRING ," +
        "    company_code STRING , " +
        "    ts BIGINT," +
        "    proctime AS PROCTIME()  " +

        ") WITH (\n" +
        "    'connector.type' = 'kafka',\n" +
        "    'connector.version' = '0.11',\n" +
        "    'connector.topic' = 'test_window_01',\n" +
        "    'connector.startup-mode' = 'earliest-offset',\n" +
        "    'connector.properties.zookeeper.connect' = 'xxx:2181',\n" +
        "    'connector.properties.bootstrap.servers' = 'xxx:9092',\n" +
        "    'format.type' = 'json'\n" +
        ")";
blinkStreamTableEnv.sqlUpdate(ddlSource);

2)讀取mysql,都是基礎代碼

String dimDDL = ""
        + "CREATE TABLE dim_mysql ( "
        + "    aaa BIGINT, "
        + "    bbbb STRING , "
        + "    ccc STRING "
        + ") WITH ( "
        + "    'connector.type' = 'jdbc', "
        + "    'connector.url' = 'jdbc:mysql://xxx:3306/test', "
        + "    'connector.table' = 'test', "
        + "    'connector.driver' = 'com.mysql.jdbc.Driver', "
        + "    'connector.username' = 'root', "
        + "    'connector.password' = '12345678'  "
        + "   'connector.lookup.cache.max-rows' = '5000',  " //緩存的最大行
        + "   'connector.lookup.cache.ttl' = '360s',  "  //這個參數是ttl緩存時間,時間到了會更新,看我們的維表更新頻率
        + "   'connector.lookup.max-retries' = '3'  "

        + ")";
blinkStreamTableEnv.sqlUpdate(dimDDL);

String quertSql = "select  * from dim_mysql ";

 

3)hbase表join 效果:

public class Dw_count_user_all_bak {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
                .useBlinkPlanner()
                .inStreamingMode()
                .build();

        StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(env, blinkStreamSettings);

        SingleOutputStreamOperator<Row> ds = env.addSource(new RichSourceFunction<Row>() {
            @Override
            public void run(SourceContext<Row> ctx) throws Exception {
                Row r = new Row(2);
                r.setField(0, "a");
                r.setField(1, "a");
                ctx.collect(r);
            }

            @Override
            public void cancel() {

            }
        }).returns(Types.ROW(Types.STRING, Types.STRING));


//        blinkStreamTableEnv.createTemporaryView("t",ds,"id,order_key,proctime.proctime");

        String ddlSource = "CREATE TABLE t (\n" +
                "    order_key STRING ," +
                "    order_number STRING ," +
                "    company_code STRING , " +
                "    ts BIGINT," +
                "    proctime AS PROCTIME()  " +
                ") WITH (" +
                "    'connector.type' = 'kafka'," +
                "    'connector.version' = '0.11'," +
                "    'connector.topic' = 'test_window_01'," +
                "    'connector.startup-mode' = 'earliest-offset'," +
                "    'connector.properties.zookeeper.connect' = 'xxx:2181'," +
                "    'connector.properties.bootstrap.servers' = 'xxx:9092'," +
                "    'format.type' = 'json'" +
                ")";
        blinkStreamTableEnv.sqlUpdate(ddlSource);
        //todo join Hbase維表
        String hbaseDDL = "" +
                "CREATE TABLE dim_hbase ( " +
                "rowkey String, " +
                "cf ROW<aaa String> " +
                ") WITH ( " +
                "  'connector.type' = 'hbase'," +
                "  'connector.version' = '1.4.3', " +
                "  'connector.table-name' = 'test'," +
                "  'connector.zookeeper.quorum' = 'xxx:2181,xxx:2181,xxx:2181'," +
                "  'connector.zookeeper.znode.parent' = '/hbase' " +
                ")";
        System.out.println("hbaseDDL = " + hbaseDDL);
        blinkStreamTableEnv.sqlUpdate(hbaseDDL);
//        String queryHbase = "select * from dim_hbase";
//        Table hbaseTable  = blinkStreamTableEnv.sqlQuery(queryHbase);
//        blinkStreamTableEnv.toAppendStream(hbaseTable,Row.class).print();

        //todo kafka流 join hbase的流
        Table table2 = blinkStreamTableEnv.sqlQuery("select a.* ,b.* from t a left " +
                " join  dim_hbase FOR SYSTEM_TIME AS OF a.proctime AS b on a.order_key = b.rowkey");
        blinkStreamTableEnv.toRetractStream(table2,Row.class).print();

        blinkStreamTableEnv.execute(Dw_count_user_all.class.getSimpleName());

//        env.execute("dont't kill me please");
    }
}

 

4)總結的話:

我們在創建mysqlDDL的時候,是可以通過select * 查詢這個表

我們在創建hbaseDDL的時候,是不可以通過select * 查詢這個表,會報錯~~這是個坑。記得就好

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章