spark+kafka+redis簡單操作

sparkStreaming消費kafka數據,將偏移量寫入到redis中的簡單操作

kafka生產者生成一些數據

object Kafkaproducer {
  def main(args: Array[String]): Unit = {

    // kafka的配置
    val props: Properties = new Properties()
    // broker的節點
    props.setProperty("bootstrap.servers", "192.168.15.134:9092,192.168.15.138:9092,192.168.15.139:9092")
    // key  和value的序列化
    props.setProperty("key.serializer", classOf[StringSerializer].getName)
    props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")

    // key - value
    val producer: KafkaProducer[String, String] = new KafkaProducer[String, String](props)

    for (i <- 0 to 1000) {
      val topic = "topic3"
      //  a - g 生成單詞
      val word:String = String.valueOf ((Random.nextInt(6) + 'a').toChar)
      Thread.sleep(500)
      val record: ProducerRecord[String, String] = new ProducerRecord[String, String](topic, word)
      producer.send(record)
    }
    println("寫入成功")
    producer.close()
  }
}

Streaming消費後用redis記錄偏移量

object streamingAndKafka {
  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf()
      .setAppName(this.getClass.getSimpleName)
      .setMaster("local[*]")

    val groupId = "streaming1"
    val topics = Array("topic3")
    // kafka的配置參數
    val kafkaParams = mutable.HashMap[String, Object](
      "bootstrap.servers" -> "192.168.15.134:9092,192.168.15.138:9092,192.168.15.139:9092",
      // key  和value的反序列化
      "key.deserializer" -> classOf[StringDeserializer].getName,
      "value.deserializer" -> classOf[StringDeserializer].getName,
      "group.id" -> groupId,

      "auto.offset.reset"->"earliest",
      "enable.auto.commit" -> "false"
    )

    val ssc: StreamingContext = new StreamingContext(conf, Seconds(2))
    // 從redis中獲取偏移量
    val offsetsMap = mutable.HashMap[TopicPartition, Long]()
    val jedis: Jedis = new Jedis("192.168.15.134", 6379)
    val partandOffset: util.Map[String, String] = jedis.hgetAll(topics(0) + "-" + groupId)
    import scala.collection.JavaConversions._
    for (part <- partandOffset) {
      offsetsMap += (new TopicPartition(topics(0), part._1.toInt) -> part._2.toLong)
    }


//    讀到kafka中的數據之後,執行的業務邏輯:

    val kafkaDs: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(ssc,
      LocationStrategies.PreferConsistent,
      ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))

    kafkaDs.foreachRDD(rdd => {
      // 判斷rdd非空
      if (!rdd.isEmpty()) {
        // 獲取每一個分區的消費的偏移量
        val ranges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
        ranges.foreach(println)
        // 業務邏輯
        rdd.foreach(t => (t.value(), t.offset(), t.partition()))

        // 偏移量 寫到redis中
        val curTopic = topics(0)
        val config = ConfigFactory.load()
        for (t <- ranges) {
          // hash  groupId topic partition offset   hset (key,p1-v1)
          // 拼接成key: groupId-topic
          val jedis: Jedis = new Jedis("192.168.15.134", 6379)
          jedis.hset(t.topic + "-" + groupId, t.partition + "", t.untilOffset + "")
          jedis.close()
        }
      }
    })
    ssc.start()
    ssc.awaitTermination()
    kafkaDs.stop()
  }
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章