kafka 自定義存儲offset 到mysql中

kafka0.9版本之前,offset存儲在zookeeper,0.9版本以及之後,默認offset存儲在kafka的一個內置的topic中。除此之外,kafka還可以選擇自定義存儲offset。

offset的維護是相當繁瑣的,因爲需要考慮到消費者的Rebalance。

當有新的消費者加入消費者組、已有的消費者推出消費者組或者鎖訂閱的主題的分區發生變化,就會觸發到分區的重新分區,重新分區的過程叫做Rebalance

消費者發生Rebalance之後,每個消費者消費的分區就會發生變化。因此消費者要首先獲取到自己被重新分配到的分區,並且定爲到每個分區最近提交的offset位置繼續消費。

要實現自定義存儲offset,需要藉助ConsumerRebalanceListener,其中提交和獲取的offset的方法,需要根據所選的offset存儲系統自行實現:

1:定義producer

object KafkaProducerTest {
  val props = new Properties()
  //    kafka集羣 broker-list
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "cm01:9092,cm02:9092,cm03:9092")
  //    acks確認機制
  props.put(ProducerConfig.ACKS_CONFIG, "all")
  //    重試次數
  props.put(ProducerConfig.RETRIES_CONFIG, "1")
  //    批次大小
  props.put(ProducerConfig.BATCH_SIZE_CONFIG, "16384")
  //    等待時間
  props.put(ProducerConfig.LINGER_MS_CONFIG, "1")
  //    RecordAccumulator緩衝區大小
  props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432")
  //    key序列化
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
  //    value序列化
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])

  private val producer = new KafkaProducer[String, String](props)

  def generateHash(input: String): String = {
    val digest: MessageDigest = MessageDigest.getInstance("MD5")
    val random: Int = Random.nextInt(1000)
    digest.update((input + random).getBytes())
    val bytes: Array[Byte] = digest.digest()
    val bi = new BigInteger(1, bytes)
    val string: String = bi.toString(16)
    string.substring(0, 3) + input + random
  }

  def sendData = {
    val topic: String = "mysql_store_offset"

    producer.send(new ProducerRecord[String, String](
      topic,
      generateHash("mysql_store_offset"),
      s"${Random.nextInt(1000)}\t金鎖家庭財產綜合險(家順險)\t1\t金鎖家庭財產綜合險(家順險)\t213\t自住型家財險\t10\t家財保險\t44\t人保財險\t23:50.0"
    ), (metadata: RecordMetadata, exception: Exception) => {
      if (exception != null) {
        println(
          s"""
             |----------------------------
             |topic   partition   offset
             |${metadata.topic()} ${metadata.partition()} ${metadata.offset()}
             |----------------------------
             |""".stripMargin)
      }
      else {
        exception.printStackTrace()
      }
    })

  }

  def main(args: Array[String]): Unit = {
    for (i <- 0 to 100){
      sendData
    }
    producer.close()
  }
}

2:傳統的mysql操作類

object DBUtils {
  private val URL: String = "jdbc:mysql://127.0.0.1:3306/mydb?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC"
  private val DRIVER: String = "com.mysql.jdbc.Driver"
  private val USERNAME: String = "root"
  private val PASSWORD: String = "123456"


  /**
   * 獲取特定消費者組,主題,分區下的偏移量
   *
   * @param consumer_group 消費者組
   * @param sub_topic      主題
   * @param partition_id   分區
   * @return offset
   */
  def query(consumer_group: String, sub_topic: String, partition_id: Int) = {
    Class.forName(DRIVER)
    val conn: Connection = DriverManager.getConnection(URL, USERNAME, PASSWORD)
    var offset: Long = 0
    val sql: String = "select sub_topic_partition_offset from offset where consumer_group=? and sub_topic=? and sub_topic_partition_id=?"
    val ps: PreparedStatement = conn.prepareStatement(sql)
    ps.setString(1, consumer_group)
    ps.setString(2, sub_topic)
    ps.setInt(3, partition_id)
    val set: ResultSet = ps.executeQuery()
    while (set.next()) {
      offset = set.getLong("sub_topic_partition_offset")
    }
    set.close()
    ps.close()
    conn.close()
    offset
  }

  /**
   * 根據特定消費者組,主題,分區,更新偏移量
   *
   * @param offset [[Offset]]
   * @return
   */
  def update(offset: Offset) = {
    Class.forName(DRIVER)
    val conn: Connection = DriverManager.getConnection(URL, USERNAME, PASSWORD)
    val sql: String = "replace into offset values(?,?,?,?,?)"
    val ps: PreparedStatement = conn.prepareStatement(sql)
    ps.setString(1, offset.consumer_group)
    ps.setString(2, offset.sub_topic)
    ps.setInt(3, offset.sub_topic_partition_id)
    ps.setLong(4, offset.sub_topic_partition_offset)
    ps.setString(5, offset.timestamp)
    ps.executeUpdate()
    ps.close()
    conn.close()
  }

  def main(args: Array[String]): Unit = {
    DBs.setupAll()
    //    query("", "", 0).foreach(println)
    update(Offset("test", "test", 0, 0, System.currentTimeMillis().toString))
    DBs.close()
  }
}

3:定義consumer

object KafkaConsumerTest {
  private val group: String = "mysql_offset"
  private val topic: String = "mysql_store_offset"
  private val props = new Properties()
  //    kafka集羣
  props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "cm01:9092,cm02:9092,cm03:9092")
  //    消費者組,只要group.id相同,就屬於同一個消費者組
  props.put(ConsumerConfig.GROUP_ID_CONFIG, group)
  //    自動提交offset
  props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
  //    key反序列化
  props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
  //    value反序列化
  props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])

  lazy val consumer = new KafkaConsumer[String, String](props)

  def executor = {
    //    訂閱主題
    consumer.subscribe(util.Arrays.asList(topic),
      new ConsumerRebalanceListener {
        //        重rebalance之前將記錄進行保存
        override def onPartitionsRevoked(partitions: util.Collection[TopicPartition]): Unit = {
          partitions.forEach(partition => {
            //            獲取分區
            val sub_topic_partition_id: Int = partition.partition()
            //            對應分區的偏移量
            val sub_topic_partition_offset: Long = consumer.position(partition)
            DBUtils.update(Offset(group, topic, sub_topic_partition_id, sub_topic_partition_offset, TimeUtils.tranTimeToString(System.currentTimeMillis().toString)))
          })
        }

        //rebalance之後讀取之前的消費記錄,繼續消費
        override def onPartitionsAssigned(partitions: util.Collection[TopicPartition]): Unit = {
          partitions.forEach(partition => {
            val sub_topic_partition_id: Int = partition.partition()
            val offset = DBUtils.query(group, topic, sub_topic_partition_id)
            consumer.seek(partition, offset)
          })
        }
      })
    while (true) {
      val records: ConsumerRecords[String, String] = consumer.poll(100)
      val offsets = new util.ArrayList[Offset]()
      records.forEach(record => {
        offsets.add(Offset(group, topic, record.partition(), record.offset(), TimeUtils.tranTimeToString(record.timestamp().toString)))
        println(
          s"""
             |---------------------------------------------------------------
             |group       topic       partition       offset        timestamp
             |$group  $topic  ${record.partition()} ${record.offset()}  ${record.timestamp()}
             |---------------------------------------------------------------
             |""".stripMargin)
      })
      offsets.forEach(offset => {
        DBUtils.update(offset)
      })
      offsets.clear()
    }
  }

  def main(args: Array[String]): Unit = {
    executor
  }
}

4:mysql數據截圖

使用replace的話,必須有相應的主鍵作爲限制,不然起不到我們想要的目的

replace:根據三個主鍵,先去查詢是否存在三個主鍵對應值得存在,不存在的話直接insert,存在的話就覆蓋

根據需求,如果需要加入consumer_id的話,那就同樣可以設置爲4號主鍵,動態的數據不能設置爲主鍵,動手嘗試一下就知道其中的奧妙了

不懂的地方都可以私信我,第一時間回覆你(刷單,網絡兼職,招聘IT講師,獵頭之類的不要私信我)

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章