Spark 高級編程(二):二次排序

目錄

 

(1)二次排序文本樣式

(2)解決思路

(3)代碼


(1)二次排序文本樣式

含義:整體數據事例:如果第一列相同,則按照第二列排序 

(2)解決思路

 * 1、實現自定義的key,要實現Ordered接口和Serializable接口,在key中實現自己對多個列的排序算法
 * 2、將包含文本的RDD,映射成key爲自定義key,value爲文本的JavaPairRDD
 * 3、使用sortByKey算子按照自定義的key進行排序
 * 4、再次映射,剔除自定義的key,只保留文本行

 

(3)代碼

package cn.spark.study.core;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;

import scala.Tuple2;

/**
 * 二次排序
 * 1、實現自定義的key,要實現Ordered接口和Serializable接口,在key中實現自己對多個列的排序算法
 * 2、將包含文本的RDD,映射成key爲自定義key,value爲文本的JavaPairRDD
 * 3、使用sortByKey算子按照自定義的key進行排序
 * 4、再次映射,剔除自定義的key,只保留文本行
 * @author Administrator
 *
 */
public class SecondarySort {

	public static void main(String[] args) {
		SparkConf conf = new SparkConf()
				.setAppName("SecondarySort") 
				.setMaster("local");
		JavaSparkContext sc = new JavaSparkContext(conf);
	
		JavaRDD<String> lines = sc.textFile("C://Users//Administrator//Desktop//sort.txt");
		
		JavaPairRDD<SecondarySortKey, String> pairs = lines.mapToPair(
				
				new PairFunction<String, SecondarySortKey, String>() {

					private static final long serialVersionUID = 1L;

					@Override
					public Tuple2<SecondarySortKey, String> call(String line) throws Exception {
						String[] lineSplited = line.split(" ");  
						SecondarySortKey key = new SecondarySortKey(
								Integer.valueOf(lineSplited[0]), 
								Integer.valueOf(lineSplited[1]));  
						return new Tuple2<SecondarySortKey, String>(key, line);
					}
					
				});
		
		JavaPairRDD<SecondarySortKey, String> sortedPairs = pairs.sortByKey();
		
		JavaRDD<String> sortedLines = sortedPairs.map(
				
				new Function<Tuple2<SecondarySortKey,String>, String>() {

					private static final long serialVersionUID = 1L;

					@Override
					public String call(Tuple2<SecondarySortKey, String> v1) throws Exception {
						return v1._2;
					}
					
				});
		
		sortedLines.foreach(new VoidFunction<String>() {

			private static final long serialVersionUID = 1L;

			@Override
			public void call(String t) throws Exception {
				System.out.println(t);  
			}
			
		});
		
		sc.close();
	}
	
}
package cn.spark.study.core;

import java.io.Serializable;

import scala.math.Ordered;

/**
 * 自定義的二次排序key
 * @author Administrator
 *
 */
public class SecondarySortKey implements Ordered<SecondarySortKey>, Serializable {

	private static final long serialVersionUID = -2366006422945129991L;
	
	// 首先在自定義key裏面,定義需要進行排序的列
	private int first;
	private int second;
	
	public SecondarySortKey(int first, int second) {
		this.first = first;
		this.second = second;
	}

	@Override
	public boolean $greater(SecondarySortKey other) {
		if(this.first > other.getFirst()) {
			return true;
		} else if(this.first == other.getFirst() && 
				this.second > other.getSecond()) {
			return true;
		}
		return false;
	}
	
	@Override
	public boolean $greater$eq(SecondarySortKey other) {
		if(this.$greater(other)) {
			return true;
		} else if(this.first == other.getFirst() && 
				this.second == other.getSecond()) {
			return true;
		}
		return false;
	}

	@Override
	public boolean $less(SecondarySortKey other) {
		if(this.first < other.getFirst()) {
			return true;
		} else if(this.first == other.getFirst() && 
				this.second < other.getSecond()) {
			return true;
		}
		return false;
	}
	
	@Override
	public boolean $less$eq(SecondarySortKey other) {
		if(this.$less(other)) {
			return true;
		} else if(this.first == other.getFirst() && 
				this.second == other.getSecond()) {
			return true;
		}
		return false;
	}
	
	@Override
	public int compare(SecondarySortKey other) {
		if(this.first - other.getFirst() != 0) {
			return this.first - other.getFirst();
		} else {
			return this.second - other.getSecond();
		}
	}
	
	@Override
	public int compareTo(SecondarySortKey other) {
		if(this.first - other.getFirst() != 0) {
			return this.first - other.getFirst();
		} else {
			return this.second - other.getSecond();
		}
	}
	
	// 爲要進行排序的多個列,提供getter和setter方法,以及hashcode和equals方法
	public int getFirst() {
		return first;
	}

	public void setFirst(int first) {
		this.first = first;
	}

	public int getSecond() {
		return second;
	}

	public void setSecond(int second) {
		this.second = second;
	}

	@Override
	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + first;
		result = prime * result + second;
		return result;
	}

	@Override
	public boolean equals(Object obj) {
		if (this == obj)
			return true;
		if (obj == null)
			return false;
		if (getClass() != obj.getClass())
			return false;
		SecondarySortKey other = (SecondarySortKey) obj;
		if (first != other.first)
			return false;
		if (second != other.second)
			return false;
		return true;
	}
	
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章