轉載作者:Alen-Gao
轉載地址:https://blog.csdn.net/sonicgyq_gyq/article/details/79239946
在實際操作中,sortBy算子往往不能滿足數據多種排序的需求,這就需要我們使用自定義排序來實現,以下是實現簡單的自定義排序的兩種方法,起到拋磚引玉的作用。
第一種方法:(Ordered:自定義一個函數)
- package cn.allengao.IpSearch
- import org.apache.spark.{SparkConf, SparkContext}
- /**
- * class_name:
- * package:
- * describe: 自定義排序(首先比較攻擊值,值高的排前面,如果攻擊值相等,比較防守值,攻擊值相同時,防守值高的排前面)
- * creat_user: Allen Gao
- * creat_date: 2018/2/2
- * creat_time: 11:01
- **/
- object CustomSort {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("CustomSort").setMaster("local[*]")
- val sc = new SparkContext(conf)
- //球員信息,姓名name,攻擊值Attack,防守值defense。
- // (首先比較攻擊值,值高的排前面,如果攻擊值相等,比較防守值,攻擊值相同時,防守值高的排前面)
- val playerInfo = sc.parallelize(Array(("Kobe", 98, 95),("O'Neal" , 98, 96),("Harden",95 ,95)))
- val res = playerInfo.sortBy(x => Player(x._2, x._3),false)
- println(res.collect().toBuffer)
- //執行結果:ArrayBuffer((O'Neal,98,96), (Kobe,98,95), (Harden,95,95))
- sc.stop()
- }
- }
- //第一種方式使用case class 模式匹配(Ordered 需要一個函數)
- case class Player(val attack: Int, val defense: Int) extends Ordered[Player] with Serializable {
- override def compare(that: Player) :Int = {
- if (this.attack == that.attack) {
- this.defense - that.defense
- } else {
- this.attack - that.attack
- }
- }
- }
第二種方法:(Ordering:implicit隱式轉換值)
- package cn.allengao.IpSearch
- import org.apache.spark.{SparkConf, SparkContext}
- /**
- * class_name:
- * package:
- * describe: 自定義排序(首先比較攻擊值,值高的排前面,如果攻擊值相等,比較防守值,攻擊值相同時,防守值高的排前面)
- * creat_user: Allen Gao
- * creat_date: 2018/2/2
- * creat_time: 11:01
- **/
- object MySort {
- implicit val playerOrdering = new Ordering[Player_1] {
- // implicit object PlayerOrdering extends Ordering[Player] {
- override def compare(x: Player_1, y: Player_1): Int = {
- if (x.attack > y.attack) 1
- else if (x.attack == y.attack) {
- if (x.defense > y.defense) 1 else -1
- } else -1
- }
- }
- }
- object CustomSort_1 {
- def main(args: Array[String]): Unit = {
- val conf = new SparkConf().setAppName("CustomSort").setMaster("local[*]")
- val sc = new SparkContext(conf)
- //球員信息,姓名name,攻擊值Attack,防守值defense。
- // (首先比較攻擊值,值高的排前面,如果攻擊值相等,比較防守值,攻擊值相同時,防守值高的排前面)
- val playerInfo = sc.parallelize(Array(("Kobe", 98, 95), ("O'Neal", 98, 96), ("Harden", 95, 95)))
- import MySort._
- val res = playerInfo.sortBy(x => Player_1(x._2, x._3), false)
- println(res.collect().toBuffer)
- //執行結果:ArrayBuffer((O'Neal,98,96), (Kobe,98,95), (Harden,95,95))
- sc.stop()
- }
- }
- //第二種方式:使用隱式轉換(Ordering需要一個隱式轉換值)
- case class Player_1(attack: Int, defense: Int) extends Serializable