Spark自定義AccumulatorV2

Spark自定義AccumulatorV2

1.概述###

  • AccumulatorV2的簡單使用與注意事項見上一遍博客https://blog.csdn.net/wtzhm/article/details/86481846
  • 在實際開發中很多時候需要用到自定義Acummulator,Accumulator太多了,不便於維護,例如項目如果要出現一些邏輯上的變更,比如說,session數量的計算邏輯,要改變,就得更改所有Accumulator對應的代碼;或者說,又要增加幾個範圍,那麼又要增加多個Accumulator,並且修改對應的累加代碼;維護成本,相當之高。

2. 實例

  • 定義一個類繼承AcummulatorV2抽象類,實現6個抽象方法

     package com.spark.zhmcode.session
     
     import com.spark.zhmcode.utils.MyStringUtils
     import org.apache.spark.util.AccumulatorV2
     
     /**
       * 第一個爲輸入類型,第二個爲輸出類型
       */
     class MyCustomerAcculatorv2 extends AccumulatorV2[String, String] {
     
         var result = "session1=0|session2=0|session3=0|session4=0"
     
         /**
           * isZero: 當AccumulatorV2中存在類似數據不存在這種問題時,是否結束程序
           */
         override def isZero: Boolean = {
             result == "session1=0|session2=0|session3=0|session4=0"
         }
     
         /**
           * 拷貝一個新的AccumulatorV2
           */
         override def copy(): AccumulatorV2[String, String] = {
             val accumulator = new MyCustomerAcculatorv2()
             accumulator.result = this.result
             accumulator
         }
     
         /**
           * 重置AccumulatorV2中的數據
           */
         override def reset(): Unit = {
             result = "session1=0|session2=0|session3=0|session4=0"
         }
     
         /**
           * 操作數據累加方法實現
           */
         override def add(v: String): Unit = {
             val v1 = result
             val v2 = v
             if (MyStringUtils.isNotEmpty(v1) && MyStringUtils.isNotEmpty(v2)) {
                 var newResult=""
                 val oldValue = MyStringUtils.getFieldFromConcatString(v1,"\\|",v2)
                 if(oldValue!=null){
                     val newValue = oldValue.toInt + 1
                     newResult = MyStringUtils.setFieldFromConcatString(v1,"\\|",v2,newValue)
                 }
                 result = newResult
             }
     
     
         }
     
         /**
           * 合併數據
           */
         override def merge(other: AccumulatorV2[String, String]): Unit = other match {
             case map: MyCustomerAcculatorv2 =>
                 result = other.value
             case _ =>
                 throw new UnsupportedOperationException(
                     s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
         }
     
         /**
           * AccumulatorV2對外訪問的數據結果
           */
         override def value: String = {
             result
         }
     
     
     }
    
  • MyStringUtils工具類

     package com.spark.zhmcode.utils
     
      
     object MyStringUtils {
     
         /**
           * 從拼接的字符串中提取字段的值
           *
           * @param str       "session1=0|session2=0|session3=0|session4=0"
           * @param delimiter 分隔符
           * @param field     字段
           * @return 字段值
           */
         def getFieldFromConcatString(str: String, delimiter: String, field: String): String = {
             val fileds = str.split(delimiter)
             var result = "0"
             for (concatField <- fileds) {
                 var fieldAndValue = concatField.split("=")
                 if (fieldAndValue.length == 2) {
                     val key = fieldAndValue(0)
                     val value = fieldAndValue(1)
                     if (field.equals(key)) {
                         result = value
                     }
                 }
             }
             result
         }
     
     
         /**
           * 從拼接的字符串中設置字段的值
           *
           * @param str       "session1=0|session2=0|session3=0|session4=0"
           * @param delimiter 分隔符
           * @param field     字段
           * @param newValue  字段對應新的value
           * @return 字段值
           */
         def setFieldFromConcatString(str: String, delimiter: String, field: String,newValue:Integer): String = {
             var result = str
             val buffer = new StringBuffer("")
             val fileds = str.split(delimiter)
             var isExist = false
             for (concatField <- fileds) {
                 var fieldAndValue = concatField.split("=")
                 if (fieldAndValue.length == 2) {
                     val key = fieldAndValue(0)
                     val value = fieldAndValue(1)
                     if (field.equals(key)) {
                         buffer.append(key).append("=").append(newValue).append("|")
                         isExist = true
                     }else{
                         buffer.append(key).append("=").append(value).append("|")
                     }
                 }
             }
     
             if(isExist){
                 var newResult = buffer.toString
                 newResult.substring(0,newResult.length-1)
             }else{
                 result
             }
         }
     
     
         /**
           * 判斷字符串是否不爲空
           *
           * @param str 字符串
           * @return 是否不爲空
           */
         def isNotEmpty(str: String): Boolean = str != null && !("" == str)
     
     
     }
    
  • 創建自定義Accumulator,記住一定的註冊,不然會拋出異常

      package com.spark.zhmcode.session
      
      import org.apache.spark.{SparkConf, SparkContext}
      
      object MyAccumulator {
          def main(args: Array[String]): Unit = {
              val conf = new SparkConf().setMaster("local[2]").setAppName("MyAccumulator")
              val sc = new SparkContext(conf)
              val data = List("session1","session2","session3","session4","session1","session3","session3","session3","a","b","c","d")
              val rdd1 = sc.parallelize(data,1)
              val accumulator = new MyCustomerAcculatorv2()
              sc.register(accumulator,"countSession")
              val resultRdd = rdd1.foreach(x=>{
                  accumulator.add(x)
              })
              println("自定義accumulator統計結果:" + accumulator.value)
              sc.stop()
          }
      }
    
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章