flink自定義函數數據處理

1.自定義普通函數

 def main(args: Array[String]): Unit = {
    //初始化Flink的Streaming(流計算)上下文執行環境
    val streamEnv: StreamExecutionEnvironment =
      StreamExecutionEnvironment.getExecutionEnvironment
    streamEnv.setParallelism(1)
    //導入隱式轉換,建議寫在這裏,可以防止IDEA代碼提示出錯的問題
    import org.apache.flink.streaming.api.scala._
    //讀取文件數據
    val data = streamEnv.readTextFile(getClass.getResource("/station.log").getPath)
      .map(line => {
        var arr = line.split(",")
        new
            StationLog(arr(0).trim, arr(1).trim, arr(2).trim, arr(3).trim, arr(4).trim.toLong, arr(5).trim.toLong)
      })
    //定義時間輸出格式
    val format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
    //過濾那些通話成功的
    data.filter(_.callType.equals("success"))
      .map(new CallMapFunction(format))
      .print()
    streamEnv.execute()
  }

  //自定義的函數類
  class CallMapFunction(format: SimpleDateFormat) extends
    // 輸入數據類型,返回數據類型
    MapFunction[StationLog, String] {
    override def map(t: StationLog): String = {
      var strartTime = t.callTime;
      var endTime = t.callTime + t.duration * 1000
      "主叫號碼:" + t.callOut + ",被叫號碼:" + t.callInt + ",呼叫起始時間:" + format.format(new Date(strartTime)) + ",呼叫結束時間:" + format.format(new Date(endTime))
    }
  }

2.自定義複函數類

object RichFunctionClassTransformation {
  def main(args: Array[String]): Unit = {
    //初始化Flink的Streaming(流計算)上下文執行環境
    val streamEnv: StreamExecutionEnvironment =
      StreamExecutionEnvironment.getExecutionEnvironment
    streamEnv.setParallelism(1)
    //導入隱式轉換,建議寫在這裏,可以防止IDEA代碼提示出錯的問題
    import org.apache.flink.streaming.api.scala._
    //讀取文件數據
    val data = streamEnv.readTextFile(getClass.getResource("/station.log").getPath)
      .map(line => {
        var arr = line.split(",")
        new
            StationLog(arr(0).trim, arr(1).trim, arr(2).trim, arr(3).trim, arr(4).trim.toLong, arr(5).trim.toLong)
      })

    //過濾那些通話成功的
    data.filter(_.callType.equals("success"))
      .map(new CallRichMapFunction())
      .print()
    streamEnv.execute()
  }

  //自定義的富函數類
  class CallRichMapFunction() extends RichMapFunction[StationLog, StationLog] {
    var conn: Connection = _
    var pst: PreparedStatement = _

    //生命週期管理,初始化的時候創建數據連接
    override def open(parameters: Configuration): Unit = {
      conn = DriverManager.getConnection("jdbc:mysql://localhost/test", "root", "123123")
      pst = conn.prepareStatement("select name from t_phone where phone_number =?")
    }

    override def map(in: StationLog): StationLog = {
      //查詢主叫用戶的名字
      pst.setString(1, in.callOut)
      val set1: ResultSet = pst.executeQuery()
      if (set1.next()) {
        in.callOut = set1.getString(1)
      }
      //查詢被叫用戶的名字
      pst.setString(1, in.callInt)
      val set2: ResultSet = pst.executeQuery()
      if (set2.next()) {
        in.callInt = set2.getString(1)
      }
      in
    }
    //關閉連接
    override def close(): Unit = {
      pst.close()
      conn.close()
    }
  }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章