sparkStreaming整合flume

package sparkStreaming.sparkStreamingToFlume

import java.nio.ByteBuffer
import java.util

import org.apache.flume.source.avro.AvroFlumeEvent
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.{Milliseconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.flume.{FlumeUtils, SparkFlumeEvent}

/**
  * 官方案例:http://spark.apache.org/docs/latest/streaming-flume-integration.html
  */

/**sparkStreaming 整合flume的第一種方式 基於push模式 */

/**
  *flume 配置文件配置
  *
  * agent.sinks = avroSink
  * agent.sinks.avroSink.type = avro
  * agent.sinks.avroSink.channel = memoryChannel
  * agent.sinks.avroSink.hostname = <chosen machine's hostname>
  * agent.sinks.avroSink.port = <chosen port on the machine>
  *
  */

/***
  *
  * 整合jar包
  * <dependency>
  * <groupId>org.apache.spark</groupId>
  * <artifactId>spark-streaming-flume_2.12</artifactId>
  * <version>2.4.4</version>
  * </dependency>
  *
  */

/**
  * scala 代碼
  * import org.apache.spark.streaming.flume._
  *
  * val flumeStream = FlumeUtils.createStream(streamingContext, [chosen machine's hostname], [chosen port])
  *
  */
object FlumePushStreaming {

  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName(this.getClass.getSimpleName)
    val ssc: StreamingContext = new StreamingContext(conf,Milliseconds(5000))

    val flumeStream: ReceiverInputDStream[SparkFlumeEvent] = FlumeUtils.createStream(ssc, "hostname", 8899)
    flumeStream.map(e=>{
      val event: AvroFlumeEvent = e.event
      val body: ByteBuffer = event.getBody
      val headers: util.Map[CharSequence, CharSequence] = event.getHeaders
 
    })
  }

}

總結:使用場景不多,不能保證處理且處理一次。還是kafka加mysql事務精準控制效果顯著。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章