SpearkStreaming
spark-kafka聯調
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>2.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.2</version>
</dependency>
package com.song.bigdata.stream
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
object Spark_Kafka {
def main(args: Array[String]): Unit = {
//spark的配置對象
val sparkConf = new SparkConf().setMaster("local").setAppName("spreakstr")
//實時數據分析環境對象
//採集週期:以指定的時間爲週期採集實時數據
val streamingContext = new StreamingContext(sparkConf, Seconds(3))
//從指定的端口中採集數據
//val socketLineDStream = streamingContext.socketTextStream("linux1", 9999)
//從kafka中採集數據
val kafkaDStream: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(
streamingContext,
"linux1:2181",
"songGroup",
Map("sparkTopic" -> 3) //3個分區,要有topic
//bin/kafka-topics.sh --zookeeper linux:2181 --list
//bin/kafka-topics.sh --zookeeper linux:2181 --create --topic sparkTopic --partitions 3 --replication-factor 2
)
//將採集的數據進行分解(扁平化)
val wordDStream = kafkaDStream.flatMap(t => t._2.split(" "))
//將數據進行結構的轉換方便統計分析
val mapDStream = wordDStream.map((_, 1))
//將轉換結構後的數據進行聚合處理
val wordToSumDStream = mapDStream.reduceByKey(_ + _)
//將結果打印出來
println(wordToSumDStream)
//啓動採集器
streamingContext.start()
//Driver等待採集器的執行
streamingContext.awaitTermination()
}
}