1.環境準備
1.Kafka集羣環境準備
1.準備一個Kafka集羣環境並啓動
Kafka 3.6.1 集羣安裝與部署
2.創建first Topic
/usr/kafka/kafka_2.13-3.6.1/bin/kafka-topics.sh --bootstrap-server 192.168.58.130:9092 --create --partitions 1 --replication-factor 3 --topic first
2.Flink環境準備
1.新建Maven項目 flink-kafka 【略】
2.添加POM依賴
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.18.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
<version>1.18.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>1.18.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-files</artifactId>
<version>1.18.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
<version>3.1.0-1.18</version>
</dependency>
</dependencies>
3. 資源路徑resources下新建log4j.properties 文件,更改打印日誌的級別爲 error
log4j.rootLogger=error, stdout,R
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%5L) : %m%n
log4j.appender.R=org.apache.log4j.RollingFileAppender
log4j.appender.R.File=../log/agent.log
log4j.appender.R.MaxFileSize=1024KB
log4j.appender.R.MaxBackupIndex=1
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%6L) : %m%n
4.新建包 cn.coreqi.flink 用於存放代碼【略】
2.Flink 生產者
1.新建java 類:FlinkKafkaProducer1
package cn.coreqi.flink.producer;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import java.util.ArrayList;
import java.util.Properties;
public class FlinkKafkaProducer1 {
public static void main(String[] args) throws Exception {
// 0 初始化 flink 環境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
// 1 讀取集合中數據
ArrayList<String> wordsList = new ArrayList<>();
wordsList.add("hello");
wordsList.add("world");
DataStream<String> stream = env.fromCollection(wordsList);
// 2 kafka 生產者配置信息
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.58.130:9092");
/* // 3 創建 kafka 生產者
FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>(
"first",
new SimpleStringSchema(),
properties
);
// 4 生產者和 flink 流關聯
stream.addSink(kafkaProducer);*/
// Flink1.14之前使用Flink KafkaConsumer和 FlinkKafkaProducer操作kafka ,但是Flink1.14之後棄用,取而代之的是KafkaSource和 KafkaSink
KafkaSink<String> kafkaSink = KafkaSink.<String>builder()
// 指定kafka的地址和端口
.setBootstrapServers("192.168.58.130:9092,192.168.58.131:9092,192.168.58.132:9092")
// 指定序列化器,指定Topic名稱,具體的序列化
.setRecordSerializer(
KafkaRecordSerializationSchema
.<String>builder()
.setTopic("first")
.setValueSerializationSchema(new SimpleStringSchema())
.build())
// 寫到kafka的一致性級別,精準一次 | 至少一次
.setDeliveryGuarantee(DeliveryGuarantee.EXACTLY_ONCE)
// 如果是精準一次,必須設置 事務的前綴
.setTransactionalIdPrefix("coreqi-")
//如果是精準一次,必須設置 事務超時時間: 大於 checkpoint 間隔,小於 max 15 分鐘
.setProperty(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, 10*60*1000+"")
.build();
stream.sinkTo(kafkaSink);
// 5 執行
env.execute();
}
}
2.啓動 Kafka 消費者
/usr/kafka/kafka_2.13-3.6.1/bin/kafka-console-consumer.sh --bootstrap-server 192.168.58.130:9092 --topic first
3.執行 FlinkKafkaProducer1 程序,觀察 kafka 消費者控制檯情況
3.Flink 消費者
1.新建 java 類:FlinkKafkaConsumer1
package cn.coreqi.flink.consumer;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.kafka.source.KafkaSource;
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import java.util.Properties;
public class FlinkKafkaConsumer1 {
public static void main(String[] args) throws Exception {
// 0 初始化 flink 環境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
// 1 kafka 消費者配置信息
Properties properties = new Properties();
properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.58.130:9092");
// 2 創建 kafka 消費者
/* FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<>(
"first",
new SimpleStringSchema(),
properties
);
// 3 消費者和 flink 流關聯
env.addSource(kafkaConsumer).print();*/
// Flink1.14之前使用Flink KafkaConsumer和 FlinkKafkaProducer操作kafka ,但是Flink1.14之後棄用,取而代之的是KafkaSource和 KafkaSink
KafkaSource<String> kafkaSource =
KafkaSource.<String>builder()
.setBootstrapServers("192.168.58.130:9092,192.168.58.131:9092,192.168.58.132:9092") //指定kafka節點的地址和端口
.setGroupId("coreqi") //指定消費者組的ID
.setTopics("first") // 指定消費者的Topic
.setValueOnlyDeserializer(new SimpleStringSchema()) //指定value的反序列化器
.setStartingOffsets(OffsetsInitializer.latest())
.build();
env.fromSource(
kafkaSource,
WatermarkStrategy.noWatermarks(),
"kafkaSource"
).print();
// 4 執行
env.execute();
}
}
2.啓動 FlinkKafkaConsumer1 消費者【略】
3.啓動 kafka 生產者
/usr/kafka/kafka_2.13-3.6.1/bin/kafka-console-producer.sh --bootstrap-server 192.168.58.130:9092 --topic first
4.觀察 IDEA 控制檯數據打印