package com.**.udf;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.lazy.LazyString;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.Text;
;
import java.util.ArrayList;
import java.util.List;
public final class GetVersionId extends GenericUDF {
private ListObjectInspector queueInspector1;
private ListObjectInspector queueInspector2;
private StringObjectInspector valueInspector3;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
// 参数个数校验
if (arguments.length != 3) {
throw new UDFArgumentLengthException("The function argument is error, and get " + arguments.length);
}
queueInspector1 = (ListObjectInspector) arguments[0];
queueInspector2 = (ListObjectInspector) arguments[1];
valueInspector3 = (StringObjectInspector) arguments[2];
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
/**
* @param - List<T>,list<T>
* @return
* @author lxt 。
*/
public Object evaluate(DeferredObject[] arguments) throws HiveException {
@SuppressWarnings("unchecked")
List<Text> versionIds = (List<Text>) queueInspector1.getList(arguments[0].get());
List<Text> versionDts = (List<Text>) queueInspector2.getList(arguments[1].get());
String eventTime = valueInspector3.getPrimitiveWritableObject(arguments[2].get()).toString();
try {
for (int i = versionDts.size() - 1; i >= 0; i--) {
if (eventTime.compareTo(versionDts.get(i).toString()) >= 0) {
return versionIds.get(i).toString();
}
}
if (versionDts.size() > 0 && eventTime.compareTo(versionDts.get(0).toString()) < 0) {
return versionIds.get(0).toString();
} else {
return "";
}
} catch (ClassCastException e) {
List<String> versionIdStrs = new ArrayList<>();
int elemNum = this.queueInspector1.getListLength(arguments[0].get());
for (int i = 0; i < elemNum; i++) {
LazyString lelement = (LazyString) this.queueInspector1.getListElement(arguments[0].get(), i);
String element = PrimitiveObjectInspectorFactory.javaStringObjectInspector.getPrimitiveJavaObject(lelement);
versionIdStrs.add(element);
}
List<String> versionDtStrs = new ArrayList<>();
elemNum = this.queueInspector2.getListLength(arguments[1].get());
for (int i = 0; i < elemNum; i++) {
LazyString lelement = (LazyString) this.queueInspector2.getListElement(arguments[1].get(), i);
String element = PrimitiveObjectInspectorFactory.javaStringObjectInspector.getPrimitiveJavaObject(lelement);
versionDtStrs.add(element);
}
for (int i = versionDtStrs.size() - 1; i >= 0; i--) {
if (eventTime.compareTo(versionDtStrs.get(i)) >= 0) {
return versionIdStrs.get(i);
}
}
if (versionDtStrs.size() > 0 && eventTime.compareTo(versionDtStrs.get(0)) < 0) {
return versionIdStrs.get(0);
} else {
return "";
}
}
}
@Override
public String getDisplayString(String[] strings) {
return "this method requires three parameters";
}
}
GenericUDF udf在不同数据压缩格式下的处理
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.