/**
* Merge the values for each key using an associative and commutative reduce function. This will
* also perform the merging locally on each mapper before sending results to a reducer, similarly
* to a "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.
*/
def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = self.withScope {
reduceByKey(new HashPartitioner(numPartitions), func)
}
import org.apache.spark.Partitioner
/**
* 需求: 將手機號按照前三位進行自定義分區
* @param num
*/
class MyPartition(num: Int) extends Partitioner{
override def numPartitions: Int = num
override def getPartition(key: Any): Int = {
key match {
case null => 0
case key if key.toString.startsWith("137") => 1
case key if key.toString.startsWith("138") => 2
case key if key.toString.startsWith("133") => 3
case _ => 4
}
}
}