運行代碼
PipelineOptions options = PipelineOptionsFactory.create();
// 顯式指定PipelineRunner:DirectRunner(Local模式)
options.setRunner(DirectRunner.class);
Pipeline pipeline = Pipeline.create(options);
String sql2="select cubic2(name) as num from PCOLLECTION";
Schema beamSchema = Schema.builder().addStringField("name").build();
String name="lisi";
Row row = Row.withSchema(beamSchema).addValue(name).build();
PCollection<Row> p1 = pipeline.apply(Create.of(row).withRowSchema(beamSchema));
PCollection<Row> p2 =PCollectionTuple.of(new TupleTag<>("PCOLLECTION"), p1)
.apply(
"testUdf2", SqlTransform.query(sql2).registerUdf("cubic2", new CubicIntegerFn()));
pipeline.run();
}
public static class CubicIntegerFn implements SerializableFunction<String, String> {
@Override
public String apply(String input) {
return input +"ccccc" ;
}
}
錯誤
Exception in thread "main" java.util.ServiceConfigurationError: org.apache.beam.sdk.extensions.sql.impl.udf.BeamBuiltinFunctionProvider: Provider org.apache.beam.sdk.extensions.sql.impl.udf.BuiltinStringFunctions could not be instantiated
at java.util.ServiceLoader.fail(ServiceLoader.java:232)
at java.util.ServiceLoader.access$100(ServiceLoader.java:185)
at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384)
at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
at org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv.loadBeamBuiltinFunctions(BeamSqlEnv.java:128)
at org.apache.beam.sdk.extensions.sql.SqlTransform.expand(SqlTransform.java:94)
at org.apache.beam.sdk.extensions.sql.SqlTransform.expand(SqlTransform.java:76)
at org.apache.beam.sdk.Pipeline.applyInternal(Pipeline.java:537)
at org.apache.beam.sdk.Pipeline.applyTransform(Pipeline.java:488)
at org.apache.beam.sdk.values.PCollectionTuple.apply(PCollectionTuple.java:167)
解決:
POM添加
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.13</version>
</dependency>
效果: