SparkSQL官網Java示例ERROR CodeGenerator: failed to compile問題

Table of Contents

問題代碼

SparkSessionJavaTest.java

package sparkSQL.apachedemo;


import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.expressions.MutableAggregationBuffer;
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.TypedColumn;



import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.apache.spark.sql.functions.col;

/**
 * @Classname SparkSessionJavaTest
 * @Date 2019/11/14 下午7:00
 * @Auther hadoop
 * @Description:
 * SparkSQL Java版本
 */

public class SparkSessionJavaTest {
    public static void main(String[] args){
        Logger.getLogger("org").setLevel(Level.INFO);
        SparkConf conf = new SparkConf()
                .setAppName("SparkSessionJavaTest")
                .setMaster("local[2]");
        SparkSession spark = SparkSession
                .builder()
                .config(conf)
//                .enableHiveSupport()
                .getOrCreate();
        String filePath = "file:/usr/local/spark/examples/src/main/resources/";

        typeSafeUserDefinedAggregateFunction(spark,filePath);
        spark.stop();
    }

    /**
     * 集合操作
     *自定義安全類型
     * @param spark
     * @param filePath
     */
    private static void typeSafeUserDefinedAggregateFunction(SparkSession spark,String filePath){
        Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class);
        String path = filePath+ "employees.json";
        Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder);
        ds.show();

        MyAverage2 myAverage = new MyAverage2();
	// Convert the function to a `TypedColumn` and give it a name
        TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary");
        Dataset<Double> result = ds.select(averageSalary);
        result.show();

    }

 /**
     * Employee內部類
     */
    public static class Employee implements Serializable{
        private String name;
        private long salary;

        public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

        public String getName() {
            return name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public long getSalary() {
            return salary;
        }

        public void setSalary(long salary) {
            this.salary = salary;
        }
    }


    /**
     * Average 內部類
     */
    public static class  Average implements Serializable{
        private long sum;
        private long count;

        public Average(long sum, long count) {
            this.sum = sum;
            this.count = count;
        }

        public long getSum() {
            return sum;
        }

        public void setSum(long sum) {
            this.sum = sum;
        }

        public long getCount() {
            return count;
        }

        public void setCount(long count) {
            this.count = count;
        }
    }

    public static class MyAverage2 extends Aggregator<Employee,Average,Double> {
        //A zero value for this aggregation.Should satisfy the property taht any b + zero = b
        public Average zero(){
            return new Average(0L,0L);
        }
        //Combine tow values to produce a new value. For performance,the function may modify 'buffer'
        //and return it instead of constructing a new object
        public Average reduce(Average buffer,Employee employee){
            long newSum = buffer.getSum() + employee.getSalary();
            long newCount = buffer.getCount() + 1;
            buffer.setSum(newSum);
            buffer.setCount(newCount);
            return buffer;
        }
        //Merge tow intermediate values
        public Average merge(Average b1,Average b2){
            long mergeSum = b1.getSum() + b2.getSum();
            long mergeCount = b1.getCount() + b2.getCount();
            b1.setSum(mergeSum);
            b1.setCount(mergeCount);
            return b1;
        }
        //Transform the output of the reduction
        public Double finish(Average reduction){
            return ((double)reduction.getSum()) / reduction.getCount();

        }
        //Specifies the Encoder for the intermediate value type
        public Encoder<Average> bufferEncoder(){
            return Encoders.bean(Average.class);
        }
        //Specifies the Encoder for the final output value type
        public Encoder<Double> outputEncoder(){
            return Encoders.DOUBLE();
        }

    }
}

問題：

在學習Spark SQL官網Type-Safe User-Defined Aggregate Functions示例的時候，遇到問題如下：

19/11/15 14:26:36 ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Employee(java.lang.String, long)"
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Employee(java.lang.String, long)"
.......（省略）
19/11/15 14:26:36 INFO CodeGenerator:
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */ return new SpecificSafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 006 */
/* 007 */ private Object[] references;
/* 008 */ private InternalRow mutableRow;
/* 009 */
/* 010 */
/* 011 */ public SpecificSafeProjection(Object[] references) {
/* 012 */ this.references = references;
/* 013 */ mutableRow = (InternalRow) references[references.length - 1];
/* 014 */
/* 015 */ }
/* 016 */
/* 017 */ public void initialize(int partitionIndex) {
/* 018 */
/* 019 */ }
/* 020 */
/* 021 */ public java.lang.Object apply(java.lang.Object _i) {
/* 022 */ InternalRow i = (InternalRow) _i;
/* 023 */
/* 024 */ sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_6 = InitializeJavaBean_0(i);
/* 025 */ if (false) {
/* 026 */ mutableRow.setNullAt(0);
/* 027 */ } else {
/* 028 */
/* 029 */ mutableRow.update(0, value_6);
/* 030 */ }
/* 031 */
/* 032 */ return mutableRow;
/* 033 */ }
/* 034 */
/* 035 */
/* 036 */ private sparkSQL.apachedemo.SparkSessionJavaTest$Employee InitializeJavaBean_0(InternalRow i) {
/* 037 */ final sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Employee();
/* 038 */ sparkSQL.apachedemo.SparkSessionJavaTest$Employee javaBean_0 = value_1;
/* 039 */ if (!false) {
/* 040 */
/* 041 */
/* 042 */ boolean isNull_3 = i.isNullAt(0);
/* 043 */ UTF8String value_3 = isNull_3 ? null : (i.getUTF8String(0));
/* 044 */ boolean isNull_2 = true;
/* 045 */ java.lang.String value_2 = null;
/* 046 */ if (!isNull_3) {
/* 047 */
/* 048 */ isNull_2 = false;
/* 049 */ if (!isNull_2) {
/* 050 */
/* 051 */ Object funcResult_0 = null;
/* 052 */ funcResult_0 = value_3.toString();
/* 053 */
/* 054 */ if (funcResult_0 != null) {
/* 055 */ value_2 = (java.lang.String) funcResult_0;
/* 056 */ } else {
/* 057 */ isNull_2 = true;
/* 058 */ }
/* 059 */
/* 060 */
/* 061 */ }
/* 062 */ }
/* 063 */ javaBean_0.setName(value_2);
/* 064 */
/* 065 */
/* 066 */ boolean isNull_5 = i.isNullAt(1);
/* 067 */ long value_5 = isNull_5 ? -1L : (i.getLong(1));
/* 068 */
/* 069 */ if (isNull_5) {
/* 070 */ throw new NullPointerException(((java.lang.String) references[0] /* errMsg */));
/* 071 */ }
/* 072 */ javaBean_0.setSalary(value_5);
/* 073 */
/* 074 */ }
/* 075 */
/* 076 */ return value_1;
/* 077 */ }
/* 078 */
/* 079 */ }

19/11/15 14:26:37 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 2)
java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are:

...(省略)

定位問題

File 'generated.java', Line 37, Column 85: failed to compile

/* 037 */ final sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Employee();

可以看到，代碼運行的時候調用Employees.class類中的無參構造器，而代碼中使用是有參構造器。

  public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

將這個構造器註釋掉，使用默認構造器，這個問題就消失了。

沒完呢？

但是，還有一個Average.class類也定義了有參構造器，會不會也有這個問題，也順便檢驗一下上面的分析是否正確？

（啊哈，原來的異常已經消失了，但是拋出了新的異常。）

19/11/15 14:50:06 ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"

.......（省略）

19/11/15 14:50:06 INFO CodeGenerator:
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */ return new SpecificSafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 006 */
/* 007 */ private Object[] references;
/* 008 */ private InternalRow mutableRow;
/* 009 */
/* 010 */
/* 011 */ public SpecificSafeProjection(Object[] references) {
/* 012 */ this.references = references;
/* 013 */ mutableRow = (InternalRow) references[references.length - 1];
/* 014 */
/* 015 */ }
/* 016 */
/* 017 */ public void initialize(int partitionIndex) {
/* 018 */
/* 019 */ }
/* 020 */
/* 021 */ public java.lang.Object apply(java.lang.Object _i) {
/* 022 */ InternalRow i = (InternalRow) _i;
/* 023 */
/* 024 */ final sparkSQL.apachedemo.SparkSessionJavaTest$Average value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Average();
/* 025 */ sparkSQL.apachedemo.SparkSessionJavaTest$Average javaBean_0 = value_1;
/* 026 */ if (!false) {
/* 027 */
/* 028 */
/* 029 */ long value_3 = i.getLong(0);
/* 030 */
/* 031 */ if (false) {
/* 032 */ throw new NullPointerException(((java.lang.String) references[0] /* errMsg */));
/* 033 */ }
/* 034 */ javaBean_0.setCount(value_3);
/* 035 */
/* 036 */
/* 037 */ long value_5 = i.getLong(1);
/* 038 */
/* 039 */ if (false) {
/* 040 */ throw new NullPointerException(((java.lang.String) references[1] /* errMsg */));
/* 041 */ }
/* 042 */ javaBean_0.setSum(value_5);
/* 043 */
/* 044 */ }
/* 045 */ if (false) {
/* 046 */ mutableRow.setNullAt(0);
/* 047 */ } else {
/* 048 */
/* 049 */ mutableRow.update(0, value_1);
/* 050 */ }
/* 051 */
/* 052 */ return mutableRow;
/* 053 */ }
/* 054 */
/* 055 */
/* 056 */ }

19/11/15 14:50:06 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 3)
java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"

...(省略)

定位問題

File 'generated.java', Line 24, Column 84: failed to compile

/* 024 */ final sparkSQL.apachedemo.SparkSessionJavaTest$Average value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Average();

確實，是因爲代碼在編譯的時候使用的是無參構造器，而代碼中已經指定了有參構造器，導致有異常拋出。

將有參構造器註釋掉，使用默認的構造器

  public Average(long sum, long count) {
            this.sum = sum;
            this.count = count;
        }

並將代碼中的zero函數進行修改。

        public Average zero(){
            Average average = new Average();
            average.setSum(0L);
            average.setCount(0L);
            return average;
        }

這樣就問題就能完整解決。

正確的代碼

SparkSessionJavaTest.java

package sparkSQL.apachedemo;


import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.expressions.MutableAggregationBuffer;
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.TypedColumn;



import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.apache.spark.sql.functions.col;

/**
 * @Classname SparkSessionJavaTest
 * @Date 2019/11/14 下午7:00
 * @Auther hadoop
 * @Description:
 * SparkSQL Java版本
 */

public class SparkSessionJavaTest {
    public static void main(String[] args){
        Logger.getLogger("org").setLevel(Level.INFO);
        SparkConf conf = new SparkConf()
                .setAppName("SparkSessionJavaTest")
                .setMaster("local[2]");
        SparkSession spark = SparkSession
                .builder()
                .config(conf)
//                .enableHiveSupport()
                .getOrCreate();
        String filePath = "file:/usr/local/spark/examples/src/main/resources/";

        typeSafeUserDefinedAggregateFunction(spark,filePath);
        spark.stop();
    }

    /**
     * 集合操作
     *自定義安全類型
     * @param spark
     * @param filePath
     */
    private static void typeSafeUserDefinedAggregateFunction(SparkSession spark,String filePath){
        Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class);
        String path = filePath+ "employees.json";
        Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder);
        ds.show();

        MyAverage2 myAverage = new MyAverage2();
	// Convert the function to a `TypedColumn` and give it a name
        TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary");
        Dataset<Double> result = ds.select(averageSalary);
        result.show();

    }

 /**
     * Employee內部類
     */
    public static class Employee implements Serializable{
        private String name;
        private long salary;

        public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

        public String getName() {
            return name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public long getSalary() {
            return salary;
        }

        public void setSalary(long salary) {
            this.salary = salary;
        }
    }


    /**
     * Average 內部類
     */
    public static class  Average implements Serializable{
        private long sum;
        private long count;

        // public Average(long sum, long count) {
        //     this.sum = sum;
        //     this.count = count;
        // }

        public long getSum() {
            return sum;
        }

        public void setSum(long sum) {
            this.sum = sum;
        }

        public long getCount() {
            return count;
        }

        public void setCount(long count) {
            this.count = count;
        }
    }

    public static class MyAverage2 extends Aggregator<Employee,Average,Double> {
        //A zero value for this aggregation.Should satisfy the property taht any b + zero = b
        public Average zero(){
            Average average = new Average();
            average.setSum(0L);
            average.setCount(0L);
            return average;
        }
        //Combine tow values to produce a new value. For performance,the function may modify 'buffer'
        //and return it instead of constructing a new object
        public Average reduce(Average buffer,Employee employee){
            long newSum = buffer.getSum() + employee.getSalary();
            long newCount = buffer.getCount() + 1;
            buffer.setSum(newSum);
            buffer.setCount(newCount);
            return buffer;
        }
        //Merge tow intermediate values
        public Average merge(Average b1,Average b2){
            long mergeSum = b1.getSum() + b2.getSum();
            long mergeCount = b1.getCount() + b2.getCount();
            b1.setSum(mergeSum);
            b1.setCount(mergeCount);
            return b1;
        }
        //Transform the output of the reduction
        public Double finish(Average reduction){
            return ((double)reduction.getSum()) / reduction.getCount();

        }
        //Specifies the Encoder for the intermediate value type
        public Encoder<Average> bufferEncoder(){
            return Encoders.bean(Average.class);
        }
        //Specifies the Encoder for the final output value type
        public Encoder<Double> outputEncoder(){
            return Encoders.DOUBLE();
        }

    }
}

總結

這樣的問題也是第一次遇到，在自己慢慢摸索中將問題定位出來，爲什麼自己會走那麼長的彎路，發現自己將代碼中的日誌級別設置爲Logger.getLogger("org").setLevel(Level.ERROR)，根本就沒有發現具體的問題所在。將日誌級別設置爲Logger.getLogger("org").setLevel(Level.INFO)後就可以看到全部日誌，也容易發現問題，在以後的時間中也要多注意這樣的問題。

SparkSQL官網Java示例ERROR CodeGenerator: failed to compile問題

問題代碼

定位問題

沒完呢？

定位問題

正確的代碼

總結

在線反混淆網站

SQLServer語句彙總

SQLServer官網示例表

架構師和技術員的區別

學習【聊聊框架】筆記

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結