Flink核心概念——State、StateBackend

用於測試的CustomSourceFunction

class CustomSourceFunction extends RichSourceFunction[(String, Int)]{

  var flag = true

  override def run(ctx: SourceFunction.SourceContext[(String, Int)]): Unit = {
    val arr: Array[String] = Array("a", "b", "c", "d", "e", "f", "g")
    val random: Random = new Random()
    while (flag) {
      Thread.sleep(1000)
      // 隨機取一個數組中的值
      val key: String = arr(random.nextInt(arr.length))
      val rightNow: Int = random.nextInt(10)
      ctx.collect((key, rightNow))
    }
  }
  override def cancel(): Unit = {
    flag = false
  }
}

State

OperatorState 算子狀態的作用範圍限定爲算子任務

ListState
UnionListState
BroadcastState

KeyedState 根據輸入數據流中定義的鍵(key)來維護和訪問

ValueState

object ValueStateDemo {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    val customSourceFunction: CustomSourceFunction2 = new CustomSourceFunction2

    val stream: DataStream[(String, Long)] = env.addSource(customSourceFunction)

    stream.keyBy(0)
      .flatMap(flatMapWithState)
      .print("stream")

    env.execute()
  }

  /**
    * 帶狀態的flatMapFunction
    *
    * @return
    */
  def flatMapWithState: RichFlatMapFunction[(String, Long), String] = {
    new RichFlatMapFunction[(String, Long), String] {
      var timeState: ValueState[Long] = null

      override def open(parameters: Configuration): Unit = {
        // 初始化State
        timeState = getRuntimeContext.getState(new ValueStateDescriptor[Long]("maxTime", classOf[Long]))
      }

      override def flatMap(value: (String, Long), out: Collector[String]): Unit = {
        val maxTime: Long = timeState.value()

        // 如果時間更大,則數據更新
        // maxTime == null , 用於防止maxTime被初始化爲null的情況
        if (maxTime == null || value._2 > maxTime) {
          // 更新狀態
          timeState.update(value._2)
          out.collect(value._1 + "----" + value._2)
        } else {
          // 否則不做處理
          println("沒有更新")
        }
      }
    }
  }
}

ListState

object ListStateDemo {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    val customSourceFunction: CustomSourceFunction = new CustomSourceFunction

    val stream: DataStream[(String, Int)] = env.addSource(customSourceFunction)

    stream.keyBy(0)
      .flatMap(flatMapWithState)
      .print("stream")

    env.execute()
  }

  /**
    * 帶狀態的flatMapFunction
    *
    * @return
    */
  def flatMapWithState: RichFlatMapFunction[(String, Int), List[String]] = {
    new RichFlatMapFunction[(String, Int), List[String]] {
      var timeState: ListState[String] = _

      override def open(parameters: Configuration): Unit = {
        // 初始化State
//        timeState = getRuntimeContext.getState(new ValueStateDescriptor[Long]("maxTime", classOf[Long]))
        timeState = getRuntimeContext.getListState(new ListStateDescriptor[String]("listState", classOf[String]))
      }

      override def flatMap(value: (String, Int), out: Collector[List[String]]): Unit = {
        if (value._2 > 5) {
          timeState.add(value._1 + "---" + value._2)
        } else {
          println(value._1 + "===" + value._2 + "非異常數據")
        }

        val states = timeState.get().iterator()
        val listBuf: ListBuffer[String] = new ListBuffer[String]()
        while (states.hasNext) {
          listBuf.append(states.next())
        }
        out.collect(listBuf.toList)
      }
    }
  }
}

MapState

類似於ListState

ReducingState&AggregatingState

object ReducingStateDemo {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val customSourceFunction: CustomSourceFunction = new CustomSourceFunction
    val stream: DataStream[(String, Int)] = env.addSource(customSourceFunction)
    stream.keyBy(0)
      .map(mapWithState)
      .print("stream")
    env.execute()
  }
  /**
    * 帶狀態的mapFunction
    *
    * @return
    */
  def mapWithState: RichMapFunction[(String, Int), Int] = {
    new RichMapFunction[(String, Int), Int] {
      var timeState: ReducingState[Int] = _
      /**
        * ReducingState,一個容器,這個容器裏面自己去實現一個ReduceFunction
        * 當數據添加進容器時,就去調這個ReduceFunction,實現業務邏輯
        * 對比ValueState,ValueState的業務邏輯要在初始化外面去實現業務邏輯
        * @param parameters
        */
      override def open(parameters: Configuration): Unit = {
        // 初始化State
        timeState = getRuntimeContext.getReducingState(
          new ReducingStateDescriptor[Int](
            "reducingState",
            new ReduceFunction[Int] {
              override def reduce(value1: Int, value2: Int): Int = {
                if (value1 > value2) value1 else value2
              }
            },
            classOf[Int])
        )
      }
      override def map(value: (String, Int)): Int = {
        timeState.add(value._2)
        val i: Int = timeState.get()
        i
      }
    }
  }
}

State Backend 狀態後端

概念

在這裏插入圖片描述

State Backend 狀態後端

生產配置

// 狀態管理器配置
    // 兩種:應用級別配置和集羣級別配置
    // 應用級別配置:
    // MemoryStateBackend不需要顯式配置
    // 配置FsStateBackend
    env.setStateBackend(new FsStateBackend("本地文件系統file:// 或者 HDFS文件系統hdfs://"))
    // 配置RocksDBStateBackend
    env.setStateBackend(new RocksDBStateBackend("hdfs://"))
    // 集羣配置在flink-conf.yaml文件中配置,一般集羣是用別人的,不會自己搭建生產集羣,還是用應用級別配置吧
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章