package utils
import org.apache.commons.pool2.impl.GenericObjectPoolConfig
import redis.clients.jedis.{Jedis, JedisPool}
/**
* 創建jedis連接池
*/
object Jpools {
private val poolConfig = new GenericObjectPoolConfig()
poolConfig.setMaxIdle(5)//最大的空閒連接數
poolConfig.setMaxTotal(2000)//支持最大的連接數
//連接池不需要對外提供訪問
private lazy val jedisPool = new JedisPool(poolConfig,"hadoop01")
/**
* 對外提供一個可以從池子裏面獲取連接的方法
* @return
*/
def getJedis :Jedis={
val jedis = jedisPool.getResource
jedis.select(2)
jedis
}
}
package kafka2
import java.lang
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import utils.Jpools
object DataConsumerToRedis {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("redis").setMaster("local[*]")
conf.set("spark.streaming.kafka.maxRatePerPartition","2")
conf.set("spark.streaming.stopGracefullyOnShutdown","true")
val ssc = new StreamingContext(conf,Seconds(2))
//定義一個消費組
val groupid = "day_002"
//配置參數
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "hadoop01:9092,hadoop02:9092,hadoop03:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> groupid,
"auto.offset.reset" -> "earliest",
//"auto.commit.interval.ms"-> "1000",設置爲1秒提交一次offset,默認是5秒
"enable.auto.commit" -> (false: lang.Boolean) //是否自動遞交偏移量
)
//創建kafka
val stream = KafkaUtils.createDirectStream(
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](Array("wordcount"), kafkaParams)
)
//將DStream -->RDD
stream.foreachRDD(rdd=>{
rdd.map(crd=>(crd.value(),1)).reduceByKey(_+_).foreachPartition(item=>{
//獲取一個jedis連接
val jedis = Jpools.getJedis
item.foreach(tp=>{
jedis.hincrBy("Word",tp._1,tp._2)
})
jedis.close()
})
})
ssc.start()
ssc.awaitTermination()
}
}