kafka0.9版本之前,offset存儲在zookeeper,0.9版本以及之後,默認offset存儲在kafka的一個內置的topic中。除此之外,kafka還可以選擇自定義存儲offset。
offset的維護是相當繁瑣的,因爲需要考慮到消費者的Rebalance。
當有新的消費者加入消費者組、已有的消費者推出消費者組或者鎖訂閱的主題的分區發生變化,就會觸發到分區的重新分區,重新分區的過程叫做Rebalance
消費者發生Rebalance之後,每個消費者消費的分區就會發生變化。因此消費者要首先獲取到自己被重新分配到的分區,並且定爲到每個分區最近提交的offset位置繼續消費。
要實現自定義存儲offset,需要藉助ConsumerRebalanceListener,其中提交和獲取的offset的方法,需要根據所選的offset存儲系統自行實現:
1:定義producer
object KafkaProducerTest {
val props = new Properties()
// kafka集羣 broker-list
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "cm01:9092,cm02:9092,cm03:9092")
// acks確認機制
props.put(ProducerConfig.ACKS_CONFIG, "all")
// 重試次數
props.put(ProducerConfig.RETRIES_CONFIG, "1")
// 批次大小
props.put(ProducerConfig.BATCH_SIZE_CONFIG, "16384")
// 等待時間
props.put(ProducerConfig.LINGER_MS_CONFIG, "1")
// RecordAccumulator緩衝區大小
props.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432")
// key序列化
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
// value序列化
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
private val producer = new KafkaProducer[String, String](props)
def generateHash(input: String): String = {
val digest: MessageDigest = MessageDigest.getInstance("MD5")
val random: Int = Random.nextInt(1000)
digest.update((input + random).getBytes())
val bytes: Array[Byte] = digest.digest()
val bi = new BigInteger(1, bytes)
val string: String = bi.toString(16)
string.substring(0, 3) + input + random
}
def sendData = {
val topic: String = "mysql_store_offset"
producer.send(new ProducerRecord[String, String](
topic,
generateHash("mysql_store_offset"),
s"${Random.nextInt(1000)}\t金鎖家庭財產綜合險(家順險)\t1\t金鎖家庭財產綜合險(家順險)\t213\t自住型家財險\t10\t家財保險\t44\t人保財險\t23:50.0"
), (metadata: RecordMetadata, exception: Exception) => {
if (exception != null) {
println(
s"""
|----------------------------
|topic partition offset
|${metadata.topic()} ${metadata.partition()} ${metadata.offset()}
|----------------------------
|""".stripMargin)
}
else {
exception.printStackTrace()
}
})
}
def main(args: Array[String]): Unit = {
for (i <- 0 to 100){
sendData
}
producer.close()
}
}
2:傳統的mysql操作類
object DBUtils {
private val URL: String = "jdbc:mysql://127.0.0.1:3306/mydb?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC"
private val DRIVER: String = "com.mysql.jdbc.Driver"
private val USERNAME: String = "root"
private val PASSWORD: String = "123456"
/**
* 獲取特定消費者組,主題,分區下的偏移量
*
* @param consumer_group 消費者組
* @param sub_topic 主題
* @param partition_id 分區
* @return offset
*/
def query(consumer_group: String, sub_topic: String, partition_id: Int) = {
Class.forName(DRIVER)
val conn: Connection = DriverManager.getConnection(URL, USERNAME, PASSWORD)
var offset: Long = 0
val sql: String = "select sub_topic_partition_offset from offset where consumer_group=? and sub_topic=? and sub_topic_partition_id=?"
val ps: PreparedStatement = conn.prepareStatement(sql)
ps.setString(1, consumer_group)
ps.setString(2, sub_topic)
ps.setInt(3, partition_id)
val set: ResultSet = ps.executeQuery()
while (set.next()) {
offset = set.getLong("sub_topic_partition_offset")
}
set.close()
ps.close()
conn.close()
offset
}
/**
* 根據特定消費者組,主題,分區,更新偏移量
*
* @param offset [[Offset]]
* @return
*/
def update(offset: Offset) = {
Class.forName(DRIVER)
val conn: Connection = DriverManager.getConnection(URL, USERNAME, PASSWORD)
val sql: String = "replace into offset values(?,?,?,?,?)"
val ps: PreparedStatement = conn.prepareStatement(sql)
ps.setString(1, offset.consumer_group)
ps.setString(2, offset.sub_topic)
ps.setInt(3, offset.sub_topic_partition_id)
ps.setLong(4, offset.sub_topic_partition_offset)
ps.setString(5, offset.timestamp)
ps.executeUpdate()
ps.close()
conn.close()
}
def main(args: Array[String]): Unit = {
DBs.setupAll()
// query("", "", 0).foreach(println)
update(Offset("test", "test", 0, 0, System.currentTimeMillis().toString))
DBs.close()
}
}
3:定義consumer
object KafkaConsumerTest {
private val group: String = "mysql_offset"
private val topic: String = "mysql_store_offset"
private val props = new Properties()
// kafka集羣
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "cm01:9092,cm02:9092,cm03:9092")
// 消費者組,只要group.id相同,就屬於同一個消費者組
props.put(ConsumerConfig.GROUP_ID_CONFIG, group)
// 自動提交offset
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
// key反序列化
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
// value反序列化
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, classOf[StringDeserializer])
lazy val consumer = new KafkaConsumer[String, String](props)
def executor = {
// 訂閱主題
consumer.subscribe(util.Arrays.asList(topic),
new ConsumerRebalanceListener {
// 重rebalance之前將記錄進行保存
override def onPartitionsRevoked(partitions: util.Collection[TopicPartition]): Unit = {
partitions.forEach(partition => {
// 獲取分區
val sub_topic_partition_id: Int = partition.partition()
// 對應分區的偏移量
val sub_topic_partition_offset: Long = consumer.position(partition)
DBUtils.update(Offset(group, topic, sub_topic_partition_id, sub_topic_partition_offset, TimeUtils.tranTimeToString(System.currentTimeMillis().toString)))
})
}
//rebalance之後讀取之前的消費記錄,繼續消費
override def onPartitionsAssigned(partitions: util.Collection[TopicPartition]): Unit = {
partitions.forEach(partition => {
val sub_topic_partition_id: Int = partition.partition()
val offset = DBUtils.query(group, topic, sub_topic_partition_id)
consumer.seek(partition, offset)
})
}
})
while (true) {
val records: ConsumerRecords[String, String] = consumer.poll(100)
val offsets = new util.ArrayList[Offset]()
records.forEach(record => {
offsets.add(Offset(group, topic, record.partition(), record.offset(), TimeUtils.tranTimeToString(record.timestamp().toString)))
println(
s"""
|---------------------------------------------------------------
|group topic partition offset timestamp
|$group $topic ${record.partition()} ${record.offset()} ${record.timestamp()}
|---------------------------------------------------------------
|""".stripMargin)
})
offsets.forEach(offset => {
DBUtils.update(offset)
})
offsets.clear()
}
}
def main(args: Array[String]): Unit = {
executor
}
}
4:mysql數據截圖
使用replace的話,必須有相應的主鍵作爲限制,不然起不到我們想要的目的
replace:根據三個主鍵,先去查詢是否存在三個主鍵對應值得存在,不存在的話直接insert,存在的話就覆蓋
根據需求,如果需要加入consumer_id的話,那就同樣可以設置爲4號主鍵,動態的數據不能設置爲主鍵,動手嘗試一下就知道其中的奧妙了