import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
public class UDAFSum_Sample extends NumericUDAF {
public static class Evaluator implements UDAFEvaluator {
private boolean mEmpty;
private double mSum;
public Evaluator() {
super();
init();
}
public void init() {
mSum = 0;
mEmpty = true;
}
public boolean iterate(DoubleWritable o) {
if (o != null) {
mSum += o.get();
mEmpty = false;
}
return true;
}
public DoubleWritable terminatePartial() {
// This is SQL standard - sum of zero items should be null.
return mEmpty ? null : new DoubleWritable(mSum);
}
public boolean merge(DoubleWritable o) {
if (o != null) {
mSum += o.get();
mEmpty = false;
}
return true;
}
public DoubleWritable terminate() {
// This is SQL standard - sum of zero items should be null.
return mEmpty ? null : new DoubleWritable(mSum);
}
}
}
- 將java文件編譯成Sum_Sample.jar
- 進入hive
hive> create temporary function sum_test as 'com.hrj.hive.udf.UDAFSum_Sample';
hive> select sum_test(t.num) from t;
hive> drop temporary function sum_test;
hive> quit;
- 需要import org.apache.hadoop.hive.ql.exec.UDAF以及org.apache.hadoop.hive.ql.exec.UDAFEvaluator,這兩個包都是必須的
- 函數類需要繼承UDAF類,內部類Evaluator實現UDAFEvaluator接口
- Evaluator需要實現 init、iterate、terminatePartial、merge、terminate這幾個函數
- init函數類似於構造函數,用於UDAF的初始化
- iterate接收傳入的參數,並進行內部的輪轉。其返回類型爲boolean
- terminatePartial無參數,其爲iterate函數輪轉結束後,返回亂轉數據,iterate和terminatePartial類似於hadoop的Combiner
- merge接收terminatePartial的返回結果,進行數據merge操作,其返回類型爲boolean
- terminate返回最終的聚集函數結果