Kafka 集成Flink

1.環境準備

1.Kafka集羣環境準備

1.準備一個Kafka集羣環境並啓動

Kafka 3.6.1 集羣安裝與部署

2.創建first Topic

/usr/kafka/kafka_2.13-3.6.1/bin/kafka-topics.sh --bootstrap-server 192.168.58.130:9092 --create --partitions 1 --replication-factor 3 --topic first

2.Flink環境準備

2.添加POM依賴

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.18.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
            <version>1.18.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
            <version>1.18.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-files</artifactId>
            <version>1.18.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka</artifactId>
            <version>3.1.0-1.18</version>
        </dependency>
    </dependencies>

3. 資源路徑resources下新建log4j.properties 文件,更改打印日誌的級別爲 error

log4j.rootLogger=error, stdout,R
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%5L) : %m%n
log4j.appender.R=org.apache.log4j.RollingFileAppender
log4j.appender.R.File=../log/agent.log
log4j.appender.R.MaxFileSize=1024KB
log4j.appender.R.MaxBackupIndex=1
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%6L) : %m%n

1.新建java 類:FlinkKafkaProducer1

package cn.coreqi.flink.producer;

import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;

import java.util.ArrayList;
import java.util.Properties;

public class FlinkKafkaProducer1 {
    public static void main(String[] args) throws Exception {
        // 0 初始化 flink 環境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(3);
        // 1 讀取集合中數據
        ArrayList<String> wordsList = new ArrayList<>();
        wordsList.add("hello");
        wordsList.add("world");

        DataStream<String> stream = env.fromCollection(wordsList);
        // 2 kafka 生產者配置信息
        Properties properties = new Properties();
        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.58.130:9092");

/*        // 3 創建 kafka 生產者
        FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>(
                "first",
                new SimpleStringSchema(),
                properties
        );

        // 4 生產者和 flink 流關聯
        stream.addSink(kafkaProducer);*/

        // Flink1.14之前使用Flink KafkaConsumer和 FlinkKafkaProducer操作kafka ,但是Flink1.14之後棄用,取而代之的是KafkaSource和 KafkaSink
        KafkaSink<String> kafkaSink = KafkaSink.<String>builder()
                // 指定kafka的地址和端口
                .setBootstrapServers("192.168.58.130:9092,192.168.58.131:9092,192.168.58.132:9092")
                // 指定序列化器,指定Topic名稱,具體的序列化
                .setRecordSerializer(
                        KafkaRecordSerializationSchema
                                .<String>builder()
                                .setTopic("first")
                                .setValueSerializationSchema(new SimpleStringSchema())
                                .build())
                // 寫到kafka的一致性級別,精準一次 | 至少一次
                .setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)
                // 如果是精準一次,必須設置 事務的前綴
                .setTransactionalIdPrefix("coreqi-")
                //如果是精準一次,必須設置 事務超時時間: 大於 checkpoint 間隔,小於 max 15 分鐘
                .setProperty(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, 10*60*1000+"")
                .build();

        stream.sinkTo(kafkaSink);

        // 5 執行
        env.execute();
    }
}

2.啓動 Kafka 消費者

/usr/kafka/kafka_2.13-3.6.1/bin/kafka-console-consumer.sh --bootstrap-server 192.168.58.130:9092 --topic first

3.執行 FlinkKafkaProducer1 程序,觀察 kafka 消費者控制檯情況

1.新建 java 類:FlinkKafkaConsumer1

package cn.coreqi.flink.consumer;

import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Properties;

public class FlinkKafkaConsumer1 {
    public static void main(String[] args) throws Exception {
        // 0 初始化 flink 環境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(3);
        // 1 kafka 消費者配置信息
        Properties properties = new Properties();
        properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.58.130:9092");
        // 2 創建 kafka 消費者
/*        FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<>(
                "first",
                new SimpleStringSchema(),
                properties
        );
        // 3 消費者和 flink 流關聯
        env.addSource(kafkaConsumer).print();*/

        // Flink1.14之前使用Flink KafkaConsumer和 FlinkKafkaProducer操作kafka ,但是Flink1.14之後棄用,取而代之的是KafkaSource和 KafkaSink
        KafkaSource<String> kafkaSource =
                KafkaSource.<String>builder()
                        .setBootstrapServers("192.168.58.130:9092,192.168.58.131:9092,192.168.58.132:9092") //指定kafka節點的地址和端口
                        .setGroupId("coreqi")   //指定消費者組的ID
                        .setTopics("first")   // 指定消費者的Topic
                        .setValueOnlyDeserializer(new SimpleStringSchema()) //指定value的反序列化器
                        .setStartingOffsets(OffsetsInitializer.latest())
                        .build();

        env.fromSource(
                kafkaSource,
                WatermarkStrategy.noWatermarks(),
                "kafkaSource"
        ).print();

        // 4 執行
        env.execute();
    }
}

2.啓動 FlinkKafkaConsumer1 消費者【略】

3.啓動 kafka 生產者

/usr/kafka/kafka_2.13-3.6.1/bin/kafka-console-producer.sh --bootstrap-server 192.168.58.130:9092 --topic first

4.觀察 IDEA 控制檯數據打印

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章