Flink核心概念——State、StateBackend

用于测试的CustomSourceFunction

class CustomSourceFunction extends RichSourceFunction[(String, Int)]{

  var flag = true

  override def run(ctx: SourceFunction.SourceContext[(String, Int)]): Unit = {
    val arr: Array[String] = Array("a", "b", "c", "d", "e", "f", "g")
    val random: Random = new Random()
    while (flag) {
      Thread.sleep(1000)
      // 随机取一个数组中的值
      val key: String = arr(random.nextInt(arr.length))
      val rightNow: Int = random.nextInt(10)
      ctx.collect((key, rightNow))
    }
  }
  override def cancel(): Unit = {
    flag = false
  }
}

State

OperatorState 算子状态的作用范围限定为算子任务

ListState
UnionListState
BroadcastState

KeyedState 根据输入数据流中定义的键(key)来维护和访问

ValueState

object ValueStateDemo {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    val customSourceFunction: CustomSourceFunction2 = new CustomSourceFunction2

    val stream: DataStream[(String, Long)] = env.addSource(customSourceFunction)

    stream.keyBy(0)
      .flatMap(flatMapWithState)
      .print("stream")

    env.execute()
  }

  /**
    * 带状态的flatMapFunction
    *
    * @return
    */
  def flatMapWithState: RichFlatMapFunction[(String, Long), String] = {
    new RichFlatMapFunction[(String, Long), String] {
      var timeState: ValueState[Long] = null

      override def open(parameters: Configuration): Unit = {
        // 初始化State
        timeState = getRuntimeContext.getState(new ValueStateDescriptor[Long]("maxTime", classOf[Long]))
      }

      override def flatMap(value: (String, Long), out: Collector[String]): Unit = {
        val maxTime: Long = timeState.value()

        // 如果时间更大,则数据更新
        // maxTime == null , 用于防止maxTime被初始化为null的情况
        if (maxTime == null || value._2 > maxTime) {
          // 更新状态
          timeState.update(value._2)
          out.collect(value._1 + "----" + value._2)
        } else {
          // 否则不做处理
          println("没有更新")
        }
      }
    }
  }
}

ListState

object ListStateDemo {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    val customSourceFunction: CustomSourceFunction = new CustomSourceFunction

    val stream: DataStream[(String, Int)] = env.addSource(customSourceFunction)

    stream.keyBy(0)
      .flatMap(flatMapWithState)
      .print("stream")

    env.execute()
  }

  /**
    * 带状态的flatMapFunction
    *
    * @return
    */
  def flatMapWithState: RichFlatMapFunction[(String, Int), List[String]] = {
    new RichFlatMapFunction[(String, Int), List[String]] {
      var timeState: ListState[String] = _

      override def open(parameters: Configuration): Unit = {
        // 初始化State
//        timeState = getRuntimeContext.getState(new ValueStateDescriptor[Long]("maxTime", classOf[Long]))
        timeState = getRuntimeContext.getListState(new ListStateDescriptor[String]("listState", classOf[String]))
      }

      override def flatMap(value: (String, Int), out: Collector[List[String]]): Unit = {
        if (value._2 > 5) {
          timeState.add(value._1 + "---" + value._2)
        } else {
          println(value._1 + "===" + value._2 + "非异常数据")
        }

        val states = timeState.get().iterator()
        val listBuf: ListBuffer[String] = new ListBuffer[String]()
        while (states.hasNext) {
          listBuf.append(states.next())
        }
        out.collect(listBuf.toList)
      }
    }
  }
}

MapState

类似于ListState

ReducingState&AggregatingState

object ReducingStateDemo {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val customSourceFunction: CustomSourceFunction = new CustomSourceFunction
    val stream: DataStream[(String, Int)] = env.addSource(customSourceFunction)
    stream.keyBy(0)
      .map(mapWithState)
      .print("stream")
    env.execute()
  }
  /**
    * 带状态的mapFunction
    *
    * @return
    */
  def mapWithState: RichMapFunction[(String, Int), Int] = {
    new RichMapFunction[(String, Int), Int] {
      var timeState: ReducingState[Int] = _
      /**
        * ReducingState,一个容器,这个容器里面自己去实现一个ReduceFunction
        * 当数据添加进容器时,就去调这个ReduceFunction,实现业务逻辑
        * 对比ValueState,ValueState的业务逻辑要在初始化外面去实现业务逻辑
        * @param parameters
        */
      override def open(parameters: Configuration): Unit = {
        // 初始化State
        timeState = getRuntimeContext.getReducingState(
          new ReducingStateDescriptor[Int](
            "reducingState",
            new ReduceFunction[Int] {
              override def reduce(value1: Int, value2: Int): Int = {
                if (value1 > value2) value1 else value2
              }
            },
            classOf[Int])
        )
      }
      override def map(value: (String, Int)): Int = {
        timeState.add(value._2)
        val i: Int = timeState.get()
        i
      }
    }
  }
}

State Backend 状态后端

概念

在这里插入图片描述

State Backend 状态后端

生产配置

// 状态管理器配置
    // 两种:应用级别配置和集群级别配置
    // 应用级别配置:
    // MemoryStateBackend不需要显式配置
    // 配置FsStateBackend
    env.setStateBackend(new FsStateBackend("本地文件系统file:// 或者 HDFS文件系统hdfs://"))
    // 配置RocksDBStateBackend
    env.setStateBackend(new RocksDBStateBackend("hdfs://"))
    // 集群配置在flink-conf.yaml文件中配置,一般集群是用别人的,不会自己搭建生产集群,还是用应用级别配置吧
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章