SparkSQL官網Java示例ERROR CodeGenerator: failed to compile問題

Table of Contents

 

問題代碼

定位問題

沒完呢?

定位問題

正確的代碼

總結


問題代碼

SparkSessionJavaTest.java

package sparkSQL.apachedemo;


import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.expressions.MutableAggregationBuffer;
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.TypedColumn;



import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.apache.spark.sql.functions.col;

/**
 * @Classname SparkSessionJavaTest
 * @Date 2019/11/14 下午7:00
 * @Auther hadoop
 * @Description:
 * SparkSQL Java版本
 */

public class SparkSessionJavaTest {
    public static void main(String[] args){
        Logger.getLogger("org").setLevel(Level.INFO);
        SparkConf conf = new SparkConf()
                .setAppName("SparkSessionJavaTest")
                .setMaster("local[2]");
        SparkSession spark = SparkSession
                .builder()
                .config(conf)
//                .enableHiveSupport()
                .getOrCreate();
        String filePath = "file:/usr/local/spark/examples/src/main/resources/";

        typeSafeUserDefinedAggregateFunction(spark,filePath);
        spark.stop();
    }

    /**
     * 集合操作
     *自定義安全類型
     * @param spark
     * @param filePath
     */
    private static void typeSafeUserDefinedAggregateFunction(SparkSession spark,String filePath){
        Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class);
        String path = filePath+ "employees.json";
        Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder);
        ds.show();

        MyAverage2 myAverage = new MyAverage2();
	// Convert the function to a `TypedColumn` and give it a name
        TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary");
        Dataset<Double> result = ds.select(averageSalary);
        result.show();

    }

 /**
     * Employee內部類
     */
    public static class Employee implements Serializable{
        private String name;
        private long salary;

        public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

        public String getName() {
            return name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public long getSalary() {
            return salary;
        }

        public void setSalary(long salary) {
            this.salary = salary;
        }
    }


    /**
     * Average 內部類
     */
    public static class  Average implements Serializable{
        private long sum;
        private long count;

        public Average(long sum, long count) {
            this.sum = sum;
            this.count = count;
        }

        public long getSum() {
            return sum;
        }

        public void setSum(long sum) {
            this.sum = sum;
        }

        public long getCount() {
            return count;
        }

        public void setCount(long count) {
            this.count = count;
        }
    }

    public static class MyAverage2 extends Aggregator<Employee,Average,Double> {
        //A zero value for this aggregation.Should satisfy the property taht any b + zero = b
        public Average zero(){
            return new Average(0L,0L);
        }
        //Combine tow values to produce a new value. For performance,the function may modify 'buffer'
        //and return it instead of constructing a new object
        public Average reduce(Average buffer,Employee employee){
            long newSum = buffer.getSum() + employee.getSalary();
            long newCount = buffer.getCount() + 1;
            buffer.setSum(newSum);
            buffer.setCount(newCount);
            return buffer;
        }
        //Merge tow intermediate values
        public Average merge(Average b1,Average b2){
            long mergeSum = b1.getSum() + b2.getSum();
            long mergeCount = b1.getCount() + b2.getCount();
            b1.setSum(mergeSum);
            b1.setCount(mergeCount);
            return b1;
        }
        //Transform the output of the reduction
        public Double finish(Average reduction){
            return ((double)reduction.getSum()) / reduction.getCount();

        }
        //Specifies the Encoder for the intermediate value type
        public Encoder<Average> bufferEncoder(){
            return Encoders.bean(Average.class);
        }
        //Specifies the Encoder for the final output value type
        public Encoder<Double> outputEncoder(){
            return Encoders.DOUBLE();
        }

    }
}

問題:

在學習Spark SQL官網Type-Safe User-Defined Aggregate Functions示例的時候,遇到問題如下:

19/11/15 14:26:36 ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Employee(java.lang.String, long)"
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Employee(java.lang.String, long)"
 .......(省略)
19/11/15 14:26:36 INFO CodeGenerator: 
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */   return new SpecificSafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 006 */
/* 007 */   private Object[] references;
/* 008 */   private InternalRow mutableRow;
/* 009 */
/* 010 */
/* 011 */   public SpecificSafeProjection(Object[] references) {
/* 012 */     this.references = references;
/* 013 */     mutableRow = (InternalRow) references[references.length - 1];
/* 014 */
/* 015 */   }
/* 016 */
/* 017 */   public void initialize(int partitionIndex) {
/* 018 */
/* 019 */   }
/* 020 */
/* 021 */   public java.lang.Object apply(java.lang.Object _i) {
/* 022 */     InternalRow i = (InternalRow) _i;
/* 023 */
/* 024 */     sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_6 = InitializeJavaBean_0(i);
/* 025 */     if (false) {
/* 026 */       mutableRow.setNullAt(0);
/* 027 */     } else {
/* 028 */
/* 029 */       mutableRow.update(0, value_6);
/* 030 */     }
/* 031 */
/* 032 */     return mutableRow;
/* 033 */   }
/* 034 */
/* 035 */
/* 036 */   private sparkSQL.apachedemo.SparkSessionJavaTest$Employee InitializeJavaBean_0(InternalRow i) {
/* 037 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Employee();
/* 038 */     sparkSQL.apachedemo.SparkSessionJavaTest$Employee javaBean_0 = value_1;
/* 039 */     if (!false) {
/* 040 */
/* 041 */
/* 042 */       boolean isNull_3 = i.isNullAt(0);
/* 043 */       UTF8String value_3 = isNull_3 ? null : (i.getUTF8String(0));
/* 044 */       boolean isNull_2 = true;
/* 045 */       java.lang.String value_2 = null;
/* 046 */       if (!isNull_3) {
/* 047 */
/* 048 */         isNull_2 = false;
/* 049 */         if (!isNull_2) {
/* 050 */
/* 051 */           Object funcResult_0 = null;
/* 052 */           funcResult_0 = value_3.toString();
/* 053 */
/* 054 */           if (funcResult_0 != null) {
/* 055 */             value_2 = (java.lang.String) funcResult_0;
/* 056 */           } else {
/* 057 */             isNull_2 = true;
/* 058 */           }
/* 059 */
/* 060 */
/* 061 */         }
/* 062 */       }
/* 063 */       javaBean_0.setName(value_2);
/* 064 */
/* 065 */
/* 066 */       boolean isNull_5 = i.isNullAt(1);
/* 067 */       long value_5 = isNull_5 ? -1L : (i.getLong(1));
/* 068 */
/* 069 */       if (isNull_5) {
/* 070 */         throw new NullPointerException(((java.lang.String) references[0] /* errMsg */));
/* 071 */       }
/* 072 */       javaBean_0.setSalary(value_5);
/* 073 */
/* 074 */     }
/* 075 */
/* 076 */     return value_1;
/* 077 */   }
/* 078 */
/* 079 */ }

19/11/15 14:26:37 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 2)
java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: 

...(省略)

定位問題

File 'generated.java', Line 37, Column 85: failed to compile

/* 037 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Employee();

 可以看到,代碼運行的時候調用Employees.class類中的無參構造器,而代碼中使用是有參構造器。

  public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

將這個構造器註釋掉,使用默認構造器,這個問題就消失了。

沒完呢?

但是,還有一個Average.class類也定義了有參構造器,會不會也有這個問題,也順便檢驗一下上面的分析是否正確?

(啊哈,原來的異常已經消失了,但是拋出了新的異常。)

19/11/15 14:50:06 ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"

 .......(省略)

19/11/15 14:50:06 INFO CodeGenerator: 
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */   return new SpecificSafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 006 */
/* 007 */   private Object[] references;
/* 008 */   private InternalRow mutableRow;
/* 009 */
/* 010 */
/* 011 */   public SpecificSafeProjection(Object[] references) {
/* 012 */     this.references = references;
/* 013 */     mutableRow = (InternalRow) references[references.length - 1];
/* 014 */
/* 015 */   }
/* 016 */
/* 017 */   public void initialize(int partitionIndex) {
/* 018 */
/* 019 */   }
/* 020 */
/* 021 */   public java.lang.Object apply(java.lang.Object _i) {
/* 022 */     InternalRow i = (InternalRow) _i;
/* 023 */
/* 024 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Average value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Average();
/* 025 */     sparkSQL.apachedemo.SparkSessionJavaTest$Average javaBean_0 = value_1;
/* 026 */     if (!false) {
/* 027 */
/* 028 */
/* 029 */       long value_3 = i.getLong(0);
/* 030 */
/* 031 */       if (false) {
/* 032 */         throw new NullPointerException(((java.lang.String) references[0] /* errMsg */));
/* 033 */       }
/* 034 */       javaBean_0.setCount(value_3);
/* 035 */
/* 036 */
/* 037 */       long value_5 = i.getLong(1);
/* 038 */
/* 039 */       if (false) {
/* 040 */         throw new NullPointerException(((java.lang.String) references[1] /* errMsg */));
/* 041 */       }
/* 042 */       javaBean_0.setSum(value_5);
/* 043 */
/* 044 */     }
/* 045 */     if (false) {
/* 046 */       mutableRow.setNullAt(0);
/* 047 */     } else {
/* 048 */
/* 049 */       mutableRow.update(0, value_1);
/* 050 */     }
/* 051 */
/* 052 */     return mutableRow;
/* 053 */   }
/* 054 */
/* 055 */
/* 056 */ }

19/11/15 14:50:06 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 3)
java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"

...(省略)

定位問題

File 'generated.java', Line 24, Column 84: failed to compile

/* 024 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Average value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Average();

確實,是因爲代碼在編譯的時候使用的是無參構造器,而代碼中已經指定了有參構造器,導致有異常拋出。 

將有參構造器註釋掉,使用默認的構造器

  public Average(long sum, long count) {
            this.sum = sum;
            this.count = count;
        }

並將代碼中的zero函數進行修改。

        public Average zero(){
            Average average = new Average();
            average.setSum(0L);
            average.setCount(0L);
            return average;
        }

這樣就問題就能完整解決。

正確的代碼

SparkSessionJavaTest.java

package sparkSQL.apachedemo;


import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.expressions.MutableAggregationBuffer;
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.TypedColumn;



import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.apache.spark.sql.functions.col;

/**
 * @Classname SparkSessionJavaTest
 * @Date 2019/11/14 下午7:00
 * @Auther hadoop
 * @Description:
 * SparkSQL Java版本
 */

public class SparkSessionJavaTest {
    public static void main(String[] args){
        Logger.getLogger("org").setLevel(Level.INFO);
        SparkConf conf = new SparkConf()
                .setAppName("SparkSessionJavaTest")
                .setMaster("local[2]");
        SparkSession spark = SparkSession
                .builder()
                .config(conf)
//                .enableHiveSupport()
                .getOrCreate();
        String filePath = "file:/usr/local/spark/examples/src/main/resources/";

        typeSafeUserDefinedAggregateFunction(spark,filePath);
        spark.stop();
    }

    /**
     * 集合操作
     *自定義安全類型
     * @param spark
     * @param filePath
     */
    private static void typeSafeUserDefinedAggregateFunction(SparkSession spark,String filePath){
        Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class);
        String path = filePath+ "employees.json";
        Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder);
        ds.show();

        MyAverage2 myAverage = new MyAverage2();
	// Convert the function to a `TypedColumn` and give it a name
        TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary");
        Dataset<Double> result = ds.select(averageSalary);
        result.show();

    }

 /**
     * Employee內部類
     */
    public static class Employee implements Serializable{
        private String name;
        private long salary;

        public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

        public String getName() {
            return name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public long getSalary() {
            return salary;
        }

        public void setSalary(long salary) {
            this.salary = salary;
        }
    }


    /**
     * Average 內部類
     */
    public static class  Average implements Serializable{
        private long sum;
        private long count;

        // public Average(long sum, long count) {
        //     this.sum = sum;
        //     this.count = count;
        // }

        public long getSum() {
            return sum;
        }

        public void setSum(long sum) {
            this.sum = sum;
        }

        public long getCount() {
            return count;
        }

        public void setCount(long count) {
            this.count = count;
        }
    }

    public static class MyAverage2 extends Aggregator<Employee,Average,Double> {
        //A zero value for this aggregation.Should satisfy the property taht any b + zero = b
        public Average zero(){
            Average average = new Average();
            average.setSum(0L);
            average.setCount(0L);
            return average;
        }
        //Combine tow values to produce a new value. For performance,the function may modify 'buffer'
        //and return it instead of constructing a new object
        public Average reduce(Average buffer,Employee employee){
            long newSum = buffer.getSum() + employee.getSalary();
            long newCount = buffer.getCount() + 1;
            buffer.setSum(newSum);
            buffer.setCount(newCount);
            return buffer;
        }
        //Merge tow intermediate values
        public Average merge(Average b1,Average b2){
            long mergeSum = b1.getSum() + b2.getSum();
            long mergeCount = b1.getCount() + b2.getCount();
            b1.setSum(mergeSum);
            b1.setCount(mergeCount);
            return b1;
        }
        //Transform the output of the reduction
        public Double finish(Average reduction){
            return ((double)reduction.getSum()) / reduction.getCount();

        }
        //Specifies the Encoder for the intermediate value type
        public Encoder<Average> bufferEncoder(){
            return Encoders.bean(Average.class);
        }
        //Specifies the Encoder for the final output value type
        public Encoder<Double> outputEncoder(){
            return Encoders.DOUBLE();
        }

    }
}

總結

這樣的問題也是第一次遇到,在自己慢慢摸索中將問題定位出來,爲什麼自己會走那麼長的彎路,發現自己將代碼中的日誌級別設置爲Logger.getLogger("org").setLevel(Level.ERROR),根本就沒有發現具體的問題所在。將日誌級別設置爲Logger.getLogger("org").setLevel(Level.INFO)後就可以看到全部日誌,也容易發現問題,在以後的時間中也要多注意這樣的問題。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章