Flink開發五步之第二步:Source

一、Collection/Elements Source 示例

package com.second_source
import org.apache.flink.streaming.api.scala._
/**
  * Collection/Elements Source 示例
  */
object Source_CollectionOrElements {
  def main(args: Array[String]): Unit = {
    // 1、第一步:設置執行環境設定
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val arr: Array[String] = Array("name1", "name2", "name3")
    // 2、第二步:讀取數據 (Source)
    val ds1: DataStream[String] = env.fromCollection(arr)
    val ds2: DataStream[String] = env.fromElements("name1","h","p")
    // 5、第五步:指定名稱並觸發流式任務
    ds1.print("stream1")
    ds2.print("Stream2")
    env.execute("Source_CollectionOrElements")
  }
}

二、Text Source 示例

package com.second_source

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}

/**
  * Text Source 示例
  */
object Source_Text {
  def main(args: Array[String]): Unit = {
    val environment: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val ds: DataStream[String] = environment.readTextFile("D:\\flink\\FlinkStudy\\src\\main\\resources\\hello.txt")
    ds.print("stream")
    environment.execute()
  }
}

三、Socket Source 示例

package com.second_source

import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
/**
  * Socket Source 示例
  */
object Source_Socket {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val ds: DataStream[String] = env.socketTextStream("localhost",10)
    ds.print("Stream")
    env.execute()
  }
}

四、Kafka Source 示例

package com.second_source

import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011

/**
  * Kafka Source 示例
  */
object Source_Kafka {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    // 創建KafkaDS
    val kafkaDS: DataStream[String] = env.addSource(getKafkaConsumer())

    kafkaDS.print("stream")
    env.execute()
  }

  /**
    * 創建FlinkKafkaConsumer
    * @return FlinkKafkaConsumer011[String]
    */
  def getKafkaConsumer(): FlinkKafkaConsumer011[String] ={
    // 設置kafka配置
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", "master:9092")
    properties.setProperty("group.id", "consumer-group")
    properties.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
    properties.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer")
    properties.setProperty("auto.offset.reset", "latest")

    // kafka的topic,反序列化方式,Kafka配置
    // SimpleStringSchema 默認使用UTF-8解碼
    new FlinkKafkaConsumer011[String]("sensor", new SimpleStringSchema(), properties)
  }
}

五、自定義 Source 示例

package com.second_source

import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}

/**
  * 自定義Source
  */
class MyCustomSourceFunction extends RichSourceFunction[String]{
  // flag:表示數據源是否還在正常運行
  var running: Boolean = true

  override def run(ctx: SourceFunction.SourceContext[String]): Unit = {

    val arr: Range.Inclusive = 1.to(10)

    while (running){
      arr.foreach(t =>
        ctx.collect(t.toString)
      )

      Thread.sleep(100)
    }
  }

  override def cancel(): Unit = {
    running = false
  }
}

package com.second_source

import org.apache.flink.streaming.api.scala._

/**
  * 自定義 Source 示例
  */
object Source_Custom {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    val customDS: DataStream[String] = env.addSource(new MyCustomSourceFunction)

    customDS.print("stream")

    env.execute()
  }
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章