我正在考虑在Spark中实现表达式以进行转换(当前为UDF),但我希望该表达式能够使用驱动程序生成的大型TreeMap(将进行范围搜索)。
可以在表达式内部使用广播变量吗,有什么缺点?关于如何在表达式中使用广播变量还有更好的主意吗?
//Broadcasted somewhere else
val broadcastedVariable = sparkSession.sparkContext.broadcast(new TreeMap(...))
case class GenerateMyItem(timeZoneId: Option[String] = None)
extends LeafExpression with TimeZoneAwareExpression with CodegenFallback {
def this() = this(None)
override def foldable: Boolean = true
override def nullable: Boolean = false
override def dataType: DataType = DateType
override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
copy(timeZoneId = Option(timeZoneId))
override def eval(input: InternalRow): Any = {
broadcastedVariable.value(0) //Lets assume I want to return the broadcast variable
}
override def prettyName: String = "current_date"
}
谢谢!