【kafka】Kafka的JavaAPI操作（Streams API開發生產者開發者）

創建maven工程並添加jar包

<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
<dependency>
    <groupId>org.apache.kafka</groupId>
    <artifactId>kafka-clients</artifactId>
    <version>1.0.0</version>
</dependency>   
    <dependency>
        <groupId>org.apache.kafka</groupId>
        <artifactId>kafka-streams</artifactId>
        <version>1.0.0</version>
</dependency>

</dependencies>

<build>
    <plugins>
        <!-- java編譯插件 -->
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <version>3.2</version>
            <configuration>
                <source>1.8</source>
                <target>1.8</target>
                <encoding>UTF-8</encoding>
            </configuration>
        </plugin>
    </plugins>
</build>

生產者代碼

生產者代碼

 //配置kafka集羣環境（設置）
Properties props = new Properties();
//kafka服務器地址
props.put("bootstrap.servers", "node01:9092,node02:9092,node03:9092");
//消息確認機制
props.put("acks", "all");
//重試機制
props.put("retries", 0);
//批量發送的大小
props.put("batch.size", 16384);
//消息延遲
props.put("linger.ms", 1);
//批量的緩衝區大小
props.put("buffer.memory", 33554432);
//kafka   key 和value的序列化
props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer","org.apache.kafka.common.serialization.StringSerializer");

//自定義分區條件
//props.put("partitioner.class", "Producer.ProducerPartition");
        
//2、實例一個生產者對象
KafkaProducer<String, String> kafkaProducer = new KafkaProducer<String, String>(props);

for (int i = 0; i < 9; i++) {
//3、通過生產者對象將數據發送到kafka集羣
       //1、沒有指定分區編號，沒有指定key,時採用輪詢方式存戶數據
       ProducerRecord producerRecord = new ProducerRecord("18BD12","bbbb___"+i);
       
       //2、沒有指定分區編號，指定key時，數據分發策略爲對key求取hash值，這個值與分區數量取餘，於數就是分區編號。
       //ProducerRecord producerRecord = new ProducerRecord("18BD12","test","aaaa___"+i);
            
       //3、指定分區編號，所有數據輸入到指定的分區內
       //ProducerRecord producerRecord = new ProducerRecord("18BD12",1,"test","aaaa___"+i);

       //4、定義分區策略
       //ProducerRecord producerRecord = new ProducerRecord("18BD12","test","aaaa___"+i);

       kafkaProducer.send(producerRecord);
}
//4、關閉成產者
kafkaProducer.close();

自定義分區條件

public class ProducerPartition implements Partitioner {
    @Override
    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {

   /*
   編寫自定義分區代碼
    */

        //數據目標的分區
        return 0;
    }


    @Override
    public void close() {

    }

    @Override
    public void configure(Map<String, ?> configs) {

    }
}

生產者生產數據到kafka集羣，數據到分區的方式

1.沒有指定分區編號，沒有指定key時採用輪詢方式存儲數據

2.沒有指定分區編號，指定key時，數據分發策略爲對key求取hash值，這個值與分區數量取餘，於數就是分區編號。

3.指定分區編號，所有數據輸入到指定的分區內

消費者代碼

自動提交offset

//1.添加配置文件
Properties props = new Properties();
//指定kafka服務器
props.put("bootstrap.servers", "node01:9092,node02:9092,node03:9092");
//消費組
props.put("group.id", "test");
//消費者自動提交offset值
 props.put("enable.auto.commit", "true");
//自動提交的週期
props.put("auto.commit.interval.ms",  "1000");
//kafka   key 和value的反序列化
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

//2.實例消費者對象
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<String, String>(props);

//3.設置讀取的topic
kafkaConsumer.subscribe(Arrays.asList("18BD12"));

//循環遍歷
while (true){
    //4.拉取數據，並輸出
    //獲取到所有的數據返回記錄的時間
    ConsumerRecords<String, String> consumerRecords = kafkaConsumer.poll(1000);
    //遍歷所有數據，獲得到一條
    for (ConsumerRecord<String, String> consumerRecord : consumerRecords) {
          //一條數據
          System.out.println("數據是  "+consumerRecord.value() + "偏移量是 "+consumerRecord.offset());
          }

}

手動提交offset

//1.配置文件
Properties props = new Properties();
//指定kafka服務器
props.put("bootstrap.servers", "node01:9092,node02:9092,node03:9092");
//消費組
rops.put("group.id", "test1");
//者自動提交offset值
props.put("enable.auto.commit", "false");
//自動提交的週期
//props.put("auto.commit.interval.ms",  "1000");
//kafka   key 和value的反序列化
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

//2.消費者
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<String, String>(props);
        
//3.設置topic
kafkaConsumer.subscribe(Arrays.asList("18BD12"));

while (true){
    //4.拉取數據，並輸出
    //獲取到所有的數據返回記錄的時間
    ConsumerRecords<String, String> consumerRecords = kafkaConsumer.poll(1000);
    //遍歷所有數據獲取一條
    for (ConsumerRecord<String, String> consumerRecord : consumerRecords) {
         System.out.println(consumerRecord.value()  +"     "+consumerRecord.offset());
    }

    //手動提交offset
    kafkaConsumer.commitSync();
 }

消費完每個分區之後手動提交offset

//1.配置文件
Properties props = new Properties();
//指定kafka服務器
props.put("bootstrap.servers", "node01:9092,node02:9092,node03:9092");
//消費組
rops.put("group.id", "test1");
//者自動提交offset值
props.put("enable.auto.commit", "false");
//自動提交的週期
//props.put("auto.commit.interval.ms",  "1000");
//kafka   key 和value的反序列化
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

//2.消費者
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<String, String>(props);
        
//3.設置topic
kafkaConsumer.subscribe(Arrays.asList("18BD12"));

while (true){
      //4.拉取數據，並輸出
      //獲取到所有的數據返回記錄的時間
      ConsumerRecords<String, String> consumerRecords = kafkaConsumer.poll(1000);
      //通過數據獲取到多有的分區
      Set<TopicPartition> partitions = consumerRecords.partitions();
      //遍歷所有分區，或得到一個分區
      for (TopicPartition partition : partitions) {
      //獲取每個分區的數據,多條數據
      List<ConsumerRecord<String, String>> records = consumerRecords.records(partition);
       //遍歷分區內的所有數據
            for (ConsumerRecord<String, String> record : records) {
                 System.out.println(record.value()+" "+record.partition());
           }
      //手動提交offset
      kafkaConsumer.commitSync();
}

指定分區數據進行消費

//1.配置文件
Properties props = new Properties();
//指定kafka服務器
props.put("bootstrap.servers", "node01:9092,node02:9092,node03:9092");
//消費組
rops.put("group.id", "test1");
//者自動提交offset值
props.put("enable.auto.commit", "false");
//自動提交的週期
//props.put("auto.commit.interval.ms",  "1000");
//kafka   key 和value的反序列化
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

//2.消費者
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<String, String>(props);
        
//3.設置topic與分區
TopicPartition topicPartition0 = new TopicPartition("18BD12",0);
TopicPartition topicPartition2 = new TopicPartition("18BD12",1);
kafkaConsumer.assign(Arrays.asList(topicPartition0,topicPartition2));

//消費指定分區0和分區2中的數據，並且設置消費0分區的數據offerset值從0開始，消費2分區的數據offerset值從10開始
kafkaConsumer.seek(topicPartition0,0);
kafkaConsumer.seek(topicPartition1,10);

 while (true){
     //4.拉取數據，並輸出
     //獲取到所有的數據返回記錄的時間
     ConsumerRecords<String, String> consumerRecords = kafkaConsumer.poll(1000);
     //通過數據獲取到多有的分區  0   2
     Set<TopicPartition> partitions = consumerRecords.partitions();
     //遍歷所有分區，或得到一個分區
     for (TopicPartition partition : partitions) {
           //獲取每個分區的數據,多條數據
           List<ConsumerRecord<String, String>> records = consumerRecords.records(partition);
           //遍歷分區內的所有數據，或得到一條
           for (ConsumerRecord<String, String> record : records) {
               System.out.println(record.value()+"      "+record.partition());
            }

       //手動提交offset
       kafkaConsumer.commitSync();
        }
}

自動提交offset和手動提交offset的區別：

1.自動提交會導致重複提交數據。

2.手動消費，消費一點提交一點。

Consumer消費數據的流程：

1.Consumer連接指定的Topic partition所在leader broker。

2.採用pull方式從kafkalogs中獲取消息。

3.高階API封裝了底層最原始的API，封裝後的更簡易(易使用)的API。隱藏Consumer與Broker細節。Offset保存在zookeeper。

4.低階API有經過封裝的最底層的API。API非常靈活，使用難度較大，比較繁瑣。Offset保存在kafka的一個topic裏 “__consumer_offsets”

Streams API開發

Kafka實時計算：實時生產 –-> 實時傳遞 –->實時計算 –-> 實時存儲 –-> 實時展現

需求：使用StreamAPI獲取test01這個topic當中的數據，然後將數據全部轉爲大寫，寫入到test02這個topic當中去

創建topic testt01 testt02：

cd /export/servers/kafka_2.11-1.0.0/bin
./kafka-topics.sh --create  --partitions 3 --replication-factor 2 --topic test01 --zookeeper node01:2181,node02:2181,node03:2181

cd /export/servers/kafka_2.11-1.0.0/bin
./kafka-topics.sh --create  --partitions 3 --replication-factor 2 --topic test01 --zookeeper node01:2181,node02:2181,node03:2181

開發StreamAPI

Properties props = new Properties();
//設置程序的唯一標識
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-application12");
//設置kafka集羣
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "node01:9092,node02:9092,Node03:9092");
//設置序列化與反序列化
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());

//實例一個計算邏輯
StreamsBuilder streamsBuilder = new StreamsBuilder();
//設置計算邏輯
// stream 在哪裏讀取數據  ->
// to 將數據寫入哪裏
streamsBuilder.stream("test01").mapValues(line -> line.toString().toUpperCase()).to("test02");

//構建Topology對象（拓撲，流程）
final Topology topology = streamsBuilder.build();

//實例 kafka流
KafkaStreams streams = new KafkaStreams(topology, props);

//啓動流計算
streams.start();

生產數據

node01執行以下命令，向test01這個topic當中生產數據

cd /export/servers/kafka_2.11-1.0.0/bin
./kafka-console-producer.sh --broker-list node01:9092,node02:9092,node03:9092 --topic test01

消費數據

node02執行以下命令消費test02這個topic當中的數據

cd /export/servers/kafka_2.11-1.0.0/bin
./kafka-console-consumer.sh --from-beginning  --topic test02 --zookeeper node01:2181,node02:2181,node03:2181

【kafka】Kafka的JavaAPI操作（Streams API開發生產者開發者）

創建maven工程並添加jar包

生產者代碼

生產者代碼

自定義分區條件

生產者生產數據到kafka集羣，數據到分區的方式

消費者代碼

自動提交offset

手動提交offset

消費完每個分區之後手動提交offset

指定分區數據進行消費

自動提交offset和手動提交offset的區別：

Consumer消費數據的流程：

Streams API開發

創建topic testt01 testt02：

開發StreamAPI

生產數據

消費數據

Power Automate Desktop 安裝完，登錄後老是提示one driver 錯誤

再談23種設計模式（3）：行爲型模式（學習筆記）

微前端學習筆記(4):從微前端到微模塊之EMP與hel-micro方案探索

微前端學習筆記（1）：微前端總體架構概述，從微服務發微

985 碩士程序員，空窗 4 個月沒有 Offer！

一文搞懂 Spring 循環依賴

賽博鬥地主——使用大語言模型扮演Agent智能體玩牌類遊戲。

VScode右鍵打開(添加到右鍵)

記一次 .NET某工控視覺自動化系統卡死分析

WindowsServer--SQL Server搭建主從同步實現讀寫分離 - 事務性分發

【Kylin】Kylin安裝與部署

【Kylin】Kylin的介紹

【Yarn】配置容量調度器

【HadoopHA】HadoopHA集羣的部署（YarnHA NamenodeHA）

【Yarn】Yarn運行流程提升必看

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結

【kafka】Kafka的JavaAPI操作（Streams API開發 生產者 開發者）

創建maven工程並添加jar包

生產者代碼

生產者代碼

自定義分區條件

生產者生產數據到kafka集羣，數據到分區的方式

消費者代碼

自動提交offset

手動提交offset

消費完每個分區之後手動提交offset

指定分區數據進行消費

自動提交offset和手動提交offset的區別：

Consumer消費數據的流程：

Streams API開發

創建topic testt01 testt02：

開發StreamAPI

生產數據

消費數據

【kafka】Kafka的JavaAPI操作（Streams API開發生產者開發者）