Java UDF在Spark SQL中重新引用相同的值

时间:2019-01-02 07:53:30

标签: apache-spark pyspark apache-spark-sql

我已经编写了Spark Java UDF以根据需要返回RandomNumber Generation。每当我在eclipse中运行代码时,下面的代码都会返回NEW值。但是,当我在spark sql中调用此Java UDF时,其返回的值相同。

在Spark中注册UDF spark.udf.registerJavaFunction("getGeneratedRand","com.test.RandNumGenerator",StringType())

用于随机数生成的UDF代码

   import java.util.Calendar;
    import java.util.Random;
    import org.apache.spark.api.java.*;
    import org.apache.spark.SparkConf;
    import org.apache.spark.sql.*;
    import org.apache.spark.sql.api.java.UDF0;
    import org.apache.spark.sql.api.java.UDF1;
    import org.apache.spark.sql.types.DataTypes;
    import org.apache.commons.lang3.RandomStringUtils;

    public class RandNumGenerator implements UDF0<String>
    {       static char[] characters;
            static Long randomSeed = null;
            static Random random = null;

        private  Random getRandom() {

              if (random == null) {

             synchronized (RandNumGenerator.class) {

            if (random == null) {

             randomSeed = System.currentTimeMillis();
             random = new Random(randomSeed);
                   }

                 }

             }

             return random;
        }

        @Override
        public String call() throws Exception {
        {       
                if(characters==null){

                   characters = new char[] {'1','2','3','4','5','6','7','8','9'};

            }
                String generatedRandom="";
                generatedRandom =  RandomStringUtils.random(12, 0, 0, false, true, characters,
                            getRandom());

                Calendar cal = Calendar.getInstance();
                String second = Integer.toString(cal.get(cal.SECOND));
                if(second.length()<2){
                        second = "0"+second;
                }

                String millisecond = Integer.toString(cal.get(cal.MILLISECOND));

                if(millisecond.length()==1){

                        millisecond = "00"+millisecond;
                }else if(millisecond.length()==2){
                        millisecond = "0"+millisecond;
                }

                generatedRandom = "NEW_" + generatedRandom + second + millisecond ;
                return generatedRandom;

        }
        }
        public static void main(String[] args) throws Exception {
            characters = new char[] {'1','2','3','4','5','6','7','8','9'};
            RandNumGenerator obj = new RandNumGenerator();
            obj.call();
            String res = obj.call();
            System.out.print(res);

        }     
}

在spark UDF中调用UDF

spark.sql("select getGeneratedRand(),getGeneratedRand() from db.test_tbl").show(20,False)

结果:

+---------------------------------+---------------------------------+
|UDF:getGeneratedRand()           |UDF:getGeneratedRand()|
+---------------------------------+---------------------------------+
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |
|NEW_26481847455148826            |NEW_26481847455148826            |

0 个答案:

没有答案