一.簡介
二.代碼實戰
package cn.streaming
import org.apache.flink.streaming.api.scala._
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
/**
* 統計不及格、及格、良好、優秀的次數 <60 60 80 90
* Created by Administrator on 2020/6/5.
*/
object DataStreamTest {
private final val Bound1 = 60
private final val Bound2 = 80
private final val Bound3 = 90
def main(args: Array[String]): Unit = {
val params = ParameterTool.fromArgs(args)
val env = StreamExecutionEnvironment.getExecutionEnvironment.setBufferTimeout(1)
env.getConfig.setGlobalJobParameters(params)
// 指定的IP和接口
val hostname: String = "192.168.136.7"
val port: Int = 9999
// create input stream of integer pairs
val inputStream : DataStream[(Int, Int)] = env.socketTextStream(hostname, port, '\n')
.flatMap(r => r.split("\\s"))
.map(r => (r.toInt, 1))
def compare(value: (Int,Int)) = value._1 < value._2 // 匿名函數
val output = inputStream
.map(value => (value._1, Bound1, Bound2, Bound3, value._2))
.iterate(
(iteration: DataStream[(Int, Int, Int, Int, Int)]) => {
val feedback = iteration.filter(value => !compare(value._1, value._2))
.map(r => (r._1 - 1, r._2, r._3, r._4, r._5)) //繼續向下執行
Thread.sleep(100)
feedback.print()
val output = iteration.filter(value => compare(value._1, value._2)).map(r => (r._1, r._2, r._5)) // 不及格,直接輸出
(feedback, output)
}
, 5000L // 5s計算一次
)
output.print()
env.execute("Streaming Iteration Example")
}
}
三.入參及執行效果
1.socket
2.執行結果
3.原理分析
程序首先執行輸出邏輯【也就是不需要迭代那部分】,打印出來兩天分數小於60的數據,因Thread.sleep(100),迭代部分會每100ms執行一次迭代,直至執行到不滿足迭代條件爲止。
4.異常分析
經驗證,迭代只會在首次觸發時執行,後續滿足觸發條件也不會觸發!