自定義累加器
/**
* 自定義累加器需要繼承AccumulatorV2<IN,OUT>類
* 並且要指定要累加的類型
*/
public class MyAccumulator extends AccumulatorV2<MyKey,MyKey> implements Serializable {
/**
* 該累加狀態是在Driver端初始化
* 並且值也是保存在Driver端
*/
private MyKey info = new MyKey(0, 0);
public MyKey getInfo() {
return info;
}
public void setInfo(MyKey info) {
this.info = info;
}
/**
* 判斷是否是初始化狀態
* 直接與原始狀態的值比較
* 該判斷爲自己定義的判斷方式
* @return
*/
@Override
public boolean isZero() {
return info.getPersonAgeSum()==0 && info.getPersonNum()==0;
}
/**
* 爲每個分區創建一個新的累加器
* @return
*/
@Override
public AccumulatorV2<MyKey, MyKey> copy() {
MyAccumulator myAccumulator = new MyAccumulator();
myAccumulator.info = this.info;
return myAccumulator;
}
/**
* 初始化不同的partition分區中的累加類型
*/
@Override
public void reset() {
info = new MyKey(0, 0);
}
/**
* 進行累加時以何種規則進行累加
* @param v 每條新進來的記錄
*/
@Override
public void add(MyKey v) {
info.setPersonNum(info.getPersonNum() + v.getPersonNum());
info.setPersonAgeSum(info.getPersonAgeSum() + v.getPersonAgeSum());
}
/**
* 合併不同partition分區中accumulator中儲存的狀態值
* @param other 每個分區中的累加器
*/
@Override
public void merge(AccumulatorV2<MyKey, MyKey> other) {
MyKey value = other.value();
info.setPersonNum(info.getPersonNum()+value.getPersonNum());
info.setPersonAgeSum(info.getPersonAgeSum()+value.getPersonAgeSum());
}
/**
* 最後返回的累加完成的狀態值
* @return
*/
@Override
public MyKey value() {
return info;
}
}
自定義key
public class MyKey implements Serializable {
private Integer personNum;
private Integer personAgeSum;
public MyKey() {
}
public MyKey(Integer personNum, Integer personAgeSum) {
this.personNum = personNum;
this.personAgeSum = personAgeSum;
}
public Integer getPersonNum() {
return personNum;
}
public void setPersonNum(Integer personNum) {
this.personNum = personNum;
}
public Integer getPersonAgeSum() {
return personAgeSum;
}
public void setPersonAgeSum(Integer personAgeSum) {
this.personAgeSum = personAgeSum;
}
@Override
public String toString() {
return "MyKey{" +
"personNum=" + personNum +
", personAgeSum=" + personAgeSum +
'}';
}
}
運行:
public class MyRun {
public static void main(String[] args) {
SparkConf conf = new SparkConf();
conf.setAppName("testAccumulator");
conf.setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
MyAccumulator acc = new MyAccumulator();
sc.sc().register(acc,"PersonInfoAccumulator");
JavaRDD<String> rdd = sc.parallelize(Arrays.asList(
"zhangsan 1", "lisi 2", "wangwu 3", "zhaoliu 4", "tianqi 5", "zhengba 6"
));
rdd.map(new Function<String, String>() {
@Override
public String call(String v1) throws Exception {
acc.add(new MyKey(1,Integer.parseInt(v1.split(" ")[1])));
return v1;
}
}).collect();
System.out.println("value = "+acc.value());
}
}
結果:
value = MyKey{personNum=6, personAgeSum=21}