spark_kafka

SpearkStreaming

spark-kafka聯調

<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-streaming_2.11</artifactId>
    <version>2.1.1</version>
</dependency>
<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
    <version>2.1.1</version>
</dependency>
<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>kafka-clients</artifactId>
    <version>0.11.0.2</version>
</dependency>
package com.song.bigdata.stream

import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}

object Spark_Kafka {
  def main(args: Array[String]): Unit = {
    //spark的配置對象
    val sparkConf = new SparkConf().setMaster("local").setAppName("spreakstr")
    //實時數據分析環境對象
    //採集週期:以指定的時間爲週期採集實時數據
    val streamingContext = new StreamingContext(sparkConf, Seconds(3))

    //從指定的端口中採集數據
    //val socketLineDStream = streamingContext.socketTextStream("linux1", 9999)
    //從kafka中採集數據
    val kafkaDStream: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(
      streamingContext,
      "linux1:2181",
      "songGroup",
      Map("sparkTopic" -> 3) //3個分區,要有topic
      //bin/kafka-topics.sh  --zookeeper linux:2181  --list
      //bin/kafka-topics.sh  --zookeeper linux:2181  --create  --topic sparkTopic --partitions  3 --replication-factor 2

    )

    //將採集的數據進行分解(扁平化)
    val wordDStream = kafkaDStream.flatMap(t => t._2.split(" "))
    //將數據進行結構的轉換方便統計分析
    val mapDStream = wordDStream.map((_, 1))
    //將轉換結構後的數據進行聚合處理
    val wordToSumDStream = mapDStream.reduceByKey(_ + _)
    //將結果打印出來
    println(wordToSumDStream)
    //啓動採集器
    streamingContext.start()
    //Driver等待採集器的執行
    streamingContext.awaitTermination()

  }

}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章