无法使用callUDF()调用UDF - Spark Java

时间:2017-08-02 13:58:55

标签: java apache-spark apache-spark-sql user-defined-functions

我试图在注册后使用callUDF调用udf。但是,函数validateNumber()没有被调用。

代码如下所示:

public Dataset<Row> sampleCallUdf(Dataset<Row> dataset) {

    UDF2<Long, Long, String> validateNumber = (UDF2<Long, Long, String>) SampleClass::validateNumber;
    UDFRegistration udfRegister = CONFIG.getSparkSession().udf();
    udfRegister.register("validateNumber", validateNumber, DataTypes.StringType);

    return dataset.withColumn("rejection_reason",
                    coalesce(
                            callUDF("validateNumber", column("cookie"), column("session"))));
    }

    public static String validateNumber(Long cookie, Long session) {
           System.out.println("Into validateNumber function");
           if(cookie != 0){
             return "correct";
           }else{
             return "incorrect";
           }
    }

我正在尝试的输入是:

 Dataset<Row> input = spark().createDataFrame(Arrays.asList(
                RowFactory.create("28/05/2017 00:12:34", 0L, -2864001245604480000L, "abc" ,"90.202.190.106", 123, "abc", "xyz", "mno"),
                RowFactory.create("28/05/2017 00:12:34", 2345678L, 2864001245604480000L, "abc" ,"90.202.190.106", 123, "abc", "xyz", "mno")), TEMP_TABLE);

问题是,它甚至不在validateNumber()函数中打印sysout语句。

1 个答案:

答案 0 :(得分:0)

请在下面找到示例程序。

public class SparkUDF {
    public static void main(String[] args) throws Exception {
    SparkSession spark = SparkSession
            .builder()
            .appName("SparkUDF")
            .master("local[*]") 
            .getOrCreate();
    //data
    List<Tuple2<Long, Long>> inputList = new ArrayList<Tuple2<Long, Long>>();
    inputList.add(new Tuple2<Long, Long>(111l, 10011l));
    inputList.add(new Tuple2<Long, Long>(0l, 20022l));
    //Dataset
    Dataset<Row> ds = spark.createDataset(inputList, Encoders.tuple(Encoders.LONG(), Encoders.LONG())).toDF("cookie", "session");
    //udf
    UDF2<Long, Long, String> validateNumber = (UDF2<Long, Long, String>) SparkUDF::validateNumber;
    spark.udf().register("validateNumber", validateNumber, DataTypes.StringType);
    Dataset<Row> ds1 = ds.withColumn("rejection_reason",coalesce(callUDF("validateNumber", col("cookie"), col("session"))));
    ds1.show();
    spark.stop();
}

public static String validateNumber(Long cookie, Long session) {
    if (cookie != 0) {
        return "correct";
    } else {
        return "incorrect";
    }
  }
}

您将获得如下输出。

+------+-------+----------------+
|cookie|session|rejection_reason|
+------+-------+----------------+
|   111|  10011|         correct|
|     0|  20022|       incorrect|
+------+-------+----------------+