我为LocaleDateTime写了一个UDT,它激发了sql本身不支持的
>>> genexpr = (i >= 0 for i in a)
>>> list(genexpr)
[True]
>>> any(i >= 0 for i in a)
True
然后,我编写一个测试用例进行测试:
class LocalDateTimeUDT extends UserDefinedType[LocalDateTime] {
override def sqlType: DataType = TimestampType
override def serialize(obj: LocalDateTime): Any = {
obj.atZone(ZoneId.systemDefault()).toInstant.toEpochMilli
}
override def deserialize(datum: Any): LocalDateTime = {
println("datum is ..." + datum)
LocalDateTime.now()
}
override def userClass: Class[LocalDateTime] = classOf[LocalDateTime]
}
}
但是,以下异常引发:
test("SparkSQLTest") {
val spark = SparkSession.builder().master("local").appName("SparkTest").getOrCreate()
import spark.implicits._
UDTRegistration.register(classOf[LocalDateTime].getName, classOf[LocalDateTimeUDT].getName)
val seq = Seq(LocalDateTime.now(), LocalDateTime.now())
val rdd = spark.sparkContext.parallelize(seq).map(d => Row.fromSeq(Seq(d)))
val schema = new StructType().add("udt", new LocalDateTimeUDT())
val df = spark.createDataFrame(rdd, schema)
df.printSchema()
df.show(truncate = false)
df.createOrReplaceTempView("t")
// cannot resolve '(t.`udt` > current_timestamp())' due to data type mismatch:
// differing types in '(t.`udt` > current_timestamp())' (timestamp and timestamp).; line 1 pos 22;
spark.sql("select * from t where udt > current_timestamp()").show(truncate = false)
我想知道让我的SQL查询(带有过滤器)工作谢谢。