Java:
package cn.spark.sparktest; import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.PairFunction; import scala.Tuple2; import java.util.List; public class top3 { public static void main(String[] args){ SparkConf conf = new SparkConf().setMaster("local").setAppName("top3"); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<String> lines = sc.textFile("C://Users//Desktop//top.txt"); //對讀取的的RDD進行map操作 JavaPairRDD<Integer,String> pairs = lines.mapToPair(new PairFunction<String, Integer, String>() { @Override public Tuple2<Integer, String> call(String s) throws Exception { return new Tuple2<Integer, String>(Integer.valueOf(s),s); } }); JavaPairRDD<Integer,String> sortPair = pairs.sortByKey(false); //只取出數組中的第一列元素 JavaRDD<Integer> result = sortPair.map(new Function<Tuple2<Integer, String>, Integer>() { @Override public Integer call(Tuple2<Integer, String> i) throws Exception { return i._1; } }); List<Integer> sortList = result.take(3); for (Integer n : sortList){ System.out.println(n); } sc.close(); } }
測試:
Scala
package cn.spark.study.core import org.apache.spark.{SparkConf, SparkContext} object top3 { def main(args: Array[String]): Unit = { val conf = new SparkConf() .setMaster("local") .setAppName("top3") val sc = new SparkContext(conf) val lines = sc.textFile("C://Users//Desktop//top.txt") val sort = lines.map(sort => (sort.toInt, sort)) val result = sort.sortByKey(false) val mid = result.map(x => (x._1)) val top = mid.take(3) for (num <- top) { println(num) } } }
測試: