如何为自定义Java对象创建编码器?

时间:2016-08-28 06:15:11

标签: java apache-spark spark-java apache-spark-2.0

我正在使用以下类从Spark编码器创建bean

Class OuterClass implements Serializable {
    int id;
    ArrayList<InnerClass> listofInner;

    public int getId() {
        return id;
    }

    public void setId (int num) {
        this.id = num;
    }

    public ArrayList<InnerClass> getListofInner() {
        return listofInner;
    }

    public void setListofInner(ArrayList<InnerClass> list) {
        this.listofInner = list;
    }
}

public static class InnerClass implements Serializable {
    String streetno;

    public void setStreetno(String streetno) {
        this.streetno= streetno;
    }

    public String getStreetno() {
        return streetno;
    }
}

Encoder<OuterClass> outerClassEncoder = Encoders.bean(OuterClass.class);
Dataset<OuterClass> ds = spark.createDataset(Collections.singeltonList(outerclassList), outerClassEncoder)

我收到以下错误

Exception in thread "main" java.lang.UnsupportedOperationException: Cannot infer type for class OuterClass$InnerClass because it is not bean-compliant

如何在java中为spark实现这种类型的用例?如果我删除内部类,这工作正常。但我需要为我的用例设一个内部类。

1 个答案:

答案 0 :(得分:5)

您的JavaBean类应该有一个公共的无参数构造函数,getter和setter,它应该实现Serializable接口。 Spark SQL适用于有效的JavaBean类。

编辑:添加内部类的工作示例

<强> OuterInnerDF.java

package com.abaghel.examples;

import java.util.ArrayList;
import java.util.Collections;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import com.abaghel.examples.OuterClass.InnerClass;

public class OuterInnerDF {
  public static void main(String[] args) {
    SparkSession spark = SparkSession
            .builder()
            .appName("OuterInnerDF")
            .config("spark.sql.warehouse.dir", "/file:C:/temp")
            .master("local[2]")
            .getOrCreate();

     System.out.println("====> Create DataFrame");
     //Outer
     OuterClass us = new OuterClass();
     us.setId(111);     
     //Inner
     OuterClass.InnerClass ic = new OuterClass.InnerClass();
     ic.setStreetno("My Street");
     //list
     ArrayList<InnerClass> ar = new ArrayList<InnerClass>();
     ar.add(ic);         
     us.setListofInner(ar);  
     //DF
     Encoder<OuterClass> outerClassEncoder = Encoders.bean(OuterClass.class);        
     Dataset<OuterClass> ds = spark.createDataset(Collections.singletonList(us), outerClassEncoder);
     ds.show();
    }
}

<强> OuterClass.java

package com.abaghel.examples;

import java.io.Serializable;
import java.util.ArrayList;

public class OuterClass implements Serializable {
int id;
ArrayList<InnerClass> listofInner;

public int getId() {
    return id;
}

public void setId(int num) {
    this.id = num;
}

public ArrayList<InnerClass> getListofInner() {
    return listofInner;
}

public void setListofInner(ArrayList<InnerClass> list) {
    this.listofInner = list;
}

public static class InnerClass implements Serializable {
    String streetno;

    public void setStreetno(String streetno) {
        this.streetno = streetno;
    }

    public String getStreetno() {
        return streetno;
      }
    }
}

控制台输出

====> Create DataFrame
16/08/28 18:02:55 INFO CodeGenerator: Code generated in 32.516369 ms
+---+-------------+
| id|  listofInner|
+---+-------------+
|111|[[My Street]]|
+---+-------------+