Flink從kafka中讀數據存入Mysql Sink

上篇文章我們描述瞭如何通過繼承RichSourceFunction來實現自定義的Source。本篇中我們將繼續講述如何通過connector讀取源數據,並將讀取的數據存入到其他數據存儲系統中,主要的思路就是Flink作爲消費者來消費Kafka Topic中的數據,並將數據實時的插入到mysql/Hbase中。數據格式是模擬網站的點擊日誌:city,loginTime(登錄時間),OS(操作系統),phoneName(手機型號),下面進入正題

一、添加Maven依賴

<dependencies>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.46</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.7.2</version>
            <!--編譯時使用,運行時並不使用,所以本地測試需要註釋掉-->
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.11</artifactId>
            <version>1.7.2</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.11_2.11</artifactId>
            <version>1.6.1</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.51</version>
        </dependency>
</dependencies>

二、Entity實體類

用於封裝kafka中接收數據的實體,這樣做的好處我們在《Flink自定義DataSource之MysqlSource》一文中解釋過這樣做的優勢了,這裏不做過多贅述

package com.xpu.kafkatomysql;

/**
 * 實體類封裝
 * create by xiax.xpu on @Date 2019/4/13 12:14
 */
public class Entity {
    public String phoneName;
    public  String os;
    public String city;
    public String loginTime;

    public Entity() {
    }

    public Entity(String phoneName, String os, String city, String loginTime) {
        this.phoneName = phoneName;
        this.os = os;
        this.city = city;
        this.loginTime = loginTime;
    }

    @Override
    public String toString() {
        return "Entity{" +
                "phoneName='" + phoneName + '\'' +
                ", os='" + os + '\'' +
                ", city='" + city + '\'' +
                ", loginTime='" + loginTime + '\'' +
                '}';
    }

    public String getPhoneName() {
        return phoneName;
    }

    public void setPhoneName(String phoneName) {
        this.phoneName = phoneName;
    }

    public String getOs() {
        return os;
    }

    public void setOs(String os) {
        this.os = os;
    }

    public String getCity() {
        return city;
    }

    public void setCity(String city) {
        this.city = city;
    }

    public String getLoginTime() {
        return loginTime;
    }

    public void setLoginTime(String loginTime) {
        this.loginTime = loginTime;
    }
}

三、 模擬Kafka Producer 生產數據

這裏通過循環產生來模擬我們擬定的數據,相關代碼如下:

package com.xpu.kafkatomysql;
import com.alibaba.fastjson.JSON;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Properties;

/**
 * Producer代碼,模擬從java生產數據至Kafka Topic
 *
 * create by xiax.xpu on @Date 2019/4/13 14:28
 */
public class KafkaCreateData {
    public static final String topic = "kafka_flink_mysql";
    public static String brokerList = "192.168.83.129:9092";
    public static void createDate(){
        Entity entity = new Entity();
        Properties props = new Properties();
        //聲明Kakfa相關信息
        props.put("bootstrap.servers",brokerList);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); //key 序列化
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); //value 序列化
        props.put("request.required.acks", "1");
        KafkaProducer producer = new KafkaProducer<String, String>(props);

        //手機信號
        String phoneArray[] = {"iPhone", "HUAWEI", "xiaomi", "moto", "vivo"};
        //操作系統
        String osArray[] = {"Android 7.0", "Mac OS", "Apple Kernel", "Windows","kylin OS","chrome"};
        //城市
        String cityArray[] = {"北京","上海","杭州","南京","西藏","西安","合肥","葫蘆島"};
        //隨機產生一個手機型號
        int k = (int) (Math.random() *5);
        String phoneName = phoneArray[k];
        //隨機產生一個os
        int m = (int) (Math.random() *6);
        String os = osArray[m];
        //隨機產生一個城市地點
        int n = (int) (Math.random() *8);
        String city = cityArray[n];
        //時間戳,存當前時間
        SimpleDateFormat sf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        String loginTime = sf.format(new Date());
        //加載數據到實體中
        entity.setCity(city);
        entity.setLoginTime(loginTime);
        entity.setOs(os);
        entity.setPhoneName(phoneName);
        ProducerRecord record = new ProducerRecord<String, String>(topic,JSON.toJSONString(entity));
        producer.send(record);
        System.out.println("發送數據:"+ JSON.toJSONString(entity));
    }

    public static void main(String[] args) throws  InterruptedException{
        while (true){
            createDate();
            Thread.sleep(500);
        }
    }
}

Kafka測試idea截圖

在linux的客戶端工具,來驗證Kafka 消費者。

./kafka-console-consumer.sh --bootstrap-server 192.168.83.129:9092 --topic kafka_flink_mysql

四、 自定義MysqlSink類

我們在這部分通過繼承RichSinkFunction類來實現自定的MysqlSink,重寫open()、invoke()、close()方法來實現獲取數據庫連接、往指定表中插入數據等操作。

package com.xpu.kafkatomysql;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;

/**
 * create by xiax.xpu on @Date 2019/4/13 14:38
 */
public class MysqlSink extends RichSinkFunction<Entity> {
    private PreparedStatement ps=null;
    private Connection connection=null;
    String driver = "com.mysql.jdbc.Driver";
    String url = "jdbc:mysql://192.168.0.102:3306/flinktest?useUnicode=true&characterEncoding=UTF-8";
    String username  = "sqoopuser";
    String password = "sqoopuser";

    /**
     * open()方法建立連接
     * 這樣不用每次 invoke 的時候都要建立連接和釋放連接
     * @param parameters
     * @throws Exception
     */
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        //加載JDBC驅動
        Class.forName(driver);
        //創建連接
        connection = DriverManager.getConnection(url,username,password);
        String sql = "insert into web_access (city,loginTime,os,phoneName) values (?,?,?,?);";
        ps = connection.prepareStatement(sql);
    }

    /**
     * 每插入一條數據的調用一次invoke
     * @param value
     * @param context
     * @throws Exception
     */
    @Override
    public void invoke(Entity value, Context context) throws Exception {

        ps.setString(1,value.city);
        ps.setString(2,value.loginTime);
        ps.setString(3,value.os);
        ps.setString(4,value.phoneName);
        System.out.println("insert into web_access (city,loginTime,os,phoneName values ("+value.city+","+value.loginTime+","+value.os+","+value.phoneName);
        ps.executeUpdate();
    }
    @Override
    public void close() throws Exception {
        super.close();
        if(connection != null){
            connection.close();
        }
        if (ps != null){
            ps.close();
        }
    }
}

五、Flink入口程序

package com.xpu.kafkatomysql;

import com.alibaba.fastjson.JSON;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import java.util.Properties;

/**
 * create by xiax.xpu on @Date 2019/4/13 14:50
 */
public class FlinkSubmitter {
    public static void main(String[] args) throws Exception{
        //獲取運行時環境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //kafka配置文件
        Properties props = new Properties();
        props.put("bootstrap.servers", "192.168.83.129:9092");
        props.put("zookeeper.connect","192.168.83.129:2181");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");  //key 反序列化
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); //value 反序列化
        //這裏我們使用的是011版本,011 與 09 或者10 的區別在於,011支持Exactly-once語義
        SingleOutputStreamOperator<Entity> StreamRecord = env.addSource(new FlinkKafkaConsumer011<>(
                "kafka_flink_mysql",
                 new SimpleStringSchema(),//String 序列
                 props)).map(string -> JSON.parseObject(string, Entity.class)).setParallelism(1);

        StreamRecord.addSink(new MysqlSink());

        env.execute("KafkatoMysql");

    }
}

六、測試

Kafka 發送的數據

MysqlSink獲取連接準備插入的數據

mysql數據表中的數據:

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章