我已经编写了Spark Java UDF以根据需要返回RandomNumber Generation。每当我在eclipse中运行代码时,下面的代码都会返回NEW值。但是,当我在spark sql中调用此Java UDF时,其返回的值相同。
在Spark中注册UDF spark.udf.registerJavaFunction("getGeneratedRand","com.test.RandNumGenerator",StringType())
用于随机数生成的UDF代码
import java.util.Calendar;
import java.util.Random;
import org.apache.spark.api.java.*;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.apache.spark.sql.api.java.UDF0;
import org.apache.spark.sql.api.java.UDF1;
import org.apache.spark.sql.types.DataTypes;
import org.apache.commons.lang3.RandomStringUtils;
public class RandNumGenerator implements UDF0<String>
{ static char[] characters;
static Long randomSeed = null;
static Random random = null;
private Random getRandom() {
if (random == null) {
synchronized (RandNumGenerator.class) {
if (random == null) {
randomSeed = System.currentTimeMillis();
random = new Random(randomSeed);
}
}
}
return random;
}
@Override
public String call() throws Exception {
{
if(characters==null){
characters = new char[] {'1','2','3','4','5','6','7','8','9'};
}
String generatedRandom="";
generatedRandom = RandomStringUtils.random(12, 0, 0, false, true, characters,
getRandom());
Calendar cal = Calendar.getInstance();
String second = Integer.toString(cal.get(cal.SECOND));
if(second.length()<2){
second = "0"+second;
}
String millisecond = Integer.toString(cal.get(cal.MILLISECOND));
if(millisecond.length()==1){
millisecond = "00"+millisecond;
}else if(millisecond.length()==2){
millisecond = "0"+millisecond;
}
generatedRandom = "NEW_" + generatedRandom + second + millisecond ;
return generatedRandom;
}
}
public static void main(String[] args) throws Exception {
characters = new char[] {'1','2','3','4','5','6','7','8','9'};
RandNumGenerator obj = new RandNumGenerator();
obj.call();
String res = obj.call();
System.out.print(res);
}
}
在spark UDF中调用UDF
spark.sql("select getGeneratedRand(),getGeneratedRand() from db.test_tbl").show(20,False)
结果:
+---------------------------------+---------------------------------+
|UDF:getGeneratedRand() |UDF:getGeneratedRand()|
+---------------------------------+---------------------------------+
|NEW_26481847455148826 |NEW_26481847455148826 |
|NEW_26481847455148826 |NEW_26481847455148826 |
|NEW_26481847455148826 |NEW_26481847455148826 |
|NEW_26481847455148826 |NEW_26481847455148826 |
|NEW_26481847455148826 |NEW_26481847455148826 |
|NEW_26481847455148826 |NEW_26481847455148826 |
|NEW_26481847455148826 |NEW_26481847455148826 |