sparkStreaming消費kafka數據,將偏移量寫入到redis中的簡單操作
kafka生產者生成一些數據
object Kafkaproducer {
def main(args: Array[String]): Unit = {
// kafka的配置
val props: Properties = new Properties()
// broker的節點
props.setProperty("bootstrap.servers", "192.168.15.134:9092,192.168.15.138:9092,192.168.15.139:9092")
// key 和value的序列化
props.setProperty("key.serializer", classOf[StringSerializer].getName)
props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
// key - value
val producer: KafkaProducer[String, String] = new KafkaProducer[String, String](props)
for (i <- 0 to 1000) {
val topic = "topic3"
// a - g 生成單詞
val word:String = String.valueOf ((Random.nextInt(6) + 'a').toChar)
Thread.sleep(500)
val record: ProducerRecord[String, String] = new ProducerRecord[String, String](topic, word)
producer.send(record)
}
println("寫入成功")
producer.close()
}
}
Streaming消費後用redis記錄偏移量
object streamingAndKafka {
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf()
.setAppName(this.getClass.getSimpleName)
.setMaster("local[*]")
val groupId = "streaming1"
val topics = Array("topic3")
// kafka的配置參數
val kafkaParams = mutable.HashMap[String, Object](
"bootstrap.servers" -> "192.168.15.134:9092,192.168.15.138:9092,192.168.15.139:9092",
// key 和value的反序列化
"key.deserializer" -> classOf[StringDeserializer].getName,
"value.deserializer" -> classOf[StringDeserializer].getName,
"group.id" -> groupId,
"auto.offset.reset"->"earliest",
"enable.auto.commit" -> "false"
)
val ssc: StreamingContext = new StreamingContext(conf, Seconds(2))
// 從redis中獲取偏移量
val offsetsMap = mutable.HashMap[TopicPartition, Long]()
val jedis: Jedis = new Jedis("192.168.15.134", 6379)
val partandOffset: util.Map[String, String] = jedis.hgetAll(topics(0) + "-" + groupId)
import scala.collection.JavaConversions._
for (part <- partandOffset) {
offsetsMap += (new TopicPartition(topics(0), part._1.toInt) -> part._2.toLong)
}
// 讀到kafka中的數據之後,執行的業務邏輯:
val kafkaDs: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))
kafkaDs.foreachRDD(rdd => {
// 判斷rdd非空
if (!rdd.isEmpty()) {
// 獲取每一個分區的消費的偏移量
val ranges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
ranges.foreach(println)
// 業務邏輯
rdd.foreach(t => (t.value(), t.offset(), t.partition()))
// 偏移量 寫到redis中
val curTopic = topics(0)
val config = ConfigFactory.load()
for (t <- ranges) {
// hash groupId topic partition offset hset (key,p1-v1)
// 拼接成key: groupId-topic
val jedis: Jedis = new Jedis("192.168.15.134", 6379)
jedis.hset(t.topic + "-" + groupId, t.partition + "", t.untilOffset + "")
jedis.close()
}
}
})
ssc.start()
ssc.awaitTermination()
kafkaDs.stop()
}
}