spark streaming updatestatebykey scala

import org.apache.spark.streaming._ import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.apache.spark.streaming.dstream.DStream val conf = new SparkConf().setAppName("WordCountOnLine").setMaster("local[3]") val ssc = new StreamingContext(conf, Seconds(5)) val receiverInputStream = ssc.socketTextStream("spark1", 9999) val words = receiverInputStream.flatMap(_.split(" ")) val pairs = words.map(word => (word, 1)) // 需要設置checkpoint,不然無法進行 ssc.checkpoint("hdfs://spark1:9000/world_count/checkpoint") // 與之前state進行比較,更新state的值 val wordCount: DStream[(String, Int)] = pairs.updateStateByKey((values: Seq[Int], state: Option[Int]) => { var newValue = state.getOrElse(0) for (value <- values) { newValue += value } Option(newValue) }) wordCount.print() ssc.start() ssc.awaitTermination()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章