我在我的代码中使用了Spark UDF,但有一次注册会间歇性地失败并出现以下错误:
scala.reflect.internal.Symbols$CyclicReference: illegal cyclic reference involving package <root>
at scala.reflect.internal.Symbols$TypeSymbol.tpe(Symbols.scala:2768)
at scala.reflect.internal.Mirrors$Roots$RootPackage$.<init>(Mirrors.scala:268)
at scala.reflect.internal.Mirrors$Roots.RootPackage$lzycompute(Mirrors.scala:267)
at scala.reflect.internal.Mirrors$Roots.RootPackage(Mirrors.scala:267)
at scala.reflect.runtime.JavaMirrors$JavaMirror.scala$reflect$runtime$JavaMirrors$$makeScalaPackage(JavaMirrors.scala:902)
at scala.reflect.runtime.JavaMirrors$class.missingHook(JavaMirrors.scala:1299)
at scala.reflect.runtime.JavaUniverse.missingHook(JavaUniverse.scala:12)
at scala.reflect.internal.Mirrors$RootsBase.universeMissingHook(Mirrors.scala:77)
at scala.reflect.internal.Mirrors$RootsBase.missingHook(Mirrors.scala:79)
at scala.reflect.internal.Mirrors$RootsBase.getModuleOrClass(Mirrors.scala:48)
at scala.reflect.internal.Mirrors$RootsBase.getModuleOrClass(Mirrors.scala:40)
at scala.reflect.internal.Mirrors$RootsBase.getModuleOrClass(Mirrors.scala:40)
at scala.reflect.internal.Mirrors$RootsBase.getModuleOrClass(Mirrors.scala:40)
at scala.reflect.internal.Mirrors$RootsBase.getModuleOrClass(Mirrors.scala:40)
at scala.reflect.internal.Mirrors$RootsBase.getModuleOrClass(Mirrors.scala:40)
at scala.reflect.internal.Mirrors$RootsBase.getModuleOrClass(Mirrors.scala:61)
at scala.reflect.internal.Mirrors$RootsBase.staticModuleOrClass(Mirrors.scala:72)
at scala.reflect.internal.Mirrors$RootsBase.staticClass(Mirrors.scala:119)
at scala.reflect.internal.Mirrors$RootsBase.staticClass(Mirrors.scala:21)
at org.apache.spark.sql.catalyst.ScalaReflection$$typecreator37$1.apply(ScalaReflection.scala:669)
at scala.reflect.api.TypeTags$WeakTypeTagImpl.tpe$lzycompute(TypeTags.scala:231)
at scala.reflect.api.TypeTags$WeakTypeTagImpl.tpe(TypeTags.scala:231)
at org.apache.spark.sql.catalyst.ScalaReflection$class.localTypeOf(ScalaReflection.scala:654)
at org.apache.spark.sql.catalyst.ScalaReflection$.localTypeOf(ScalaReflection.scala:30)
at org.apache.spark.sql.catalyst.ScalaReflection$class.schemaFor(ScalaReflection.scala:669)
at org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:30)
at org.apache.spark.sql.catalyst.ScalaReflection$class.schemaFor(ScalaReflection.scala:642)
at org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:30)
at org.apache.spark.sql.UDFRegistration.register(UDFRegistration.scala:132)
at com.akamai.csi.reputation.heuristics.commons.HeuristicPhase$class.loadAndRegisterCpcodeToAccountUDF(HeuristicPhase.scala:119)
at com.akamai.csi.reputation.heuristics.commons.HeuristicPhase$class.initializeHeuristic(HeuristicPhase.scala:113)
at com.akamai.csi.reputation.heuristics.commons.dlr1hr.Dlr1hrBasePhase.initializeHeuristic(Dlr1hrBasePhase.scala:25)
at com.akamai.csi.reputation.heuristics.commons.HeuristicPhase$class.execute(HeuristicPhase.scala:50)
at com.akamai.csi.reputation.heuristics.commons.dlr1hr.Dlr1hrBasePhase.execute(Dlr1hrBasePhase.scala:16)
at com.akamai.csi.application.core.flow.FlowExecutor.executePhaseWithRetry(FlowExecutor.java:149)
at com.akamai.csi.application.core.flow.FlowExecutor.execute(FlowExecutor.java:49)
at com.akamai.csi.application.core.flow.TransactionEmbeddedFlow.executeFlow(TransactionEmbeddedFlow.java:68)
at com.akamai.csi.application.core.impl.DefaultCoreApplicationManager.executeFlow(DefaultCoreApplicationManager.java:369)
at com.akamai.csi.application.core.impl.DefaultCoreApplicationManager.lambda$submit$6(DefaultCoreApplicationManager.java:358)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at com.akamai.csi.application.core.impl.ExecutorWithTimeout.lambda$null$0(ExecutorWithTimeout.java:32)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
知道造成这种情况的原因是什么? 正如我所提到的,每次运行都没有发生,这增加了问题所在的不确定性。
编辑:添加UDF定义类:
class CpcodeToAccountArrayUDF(cpcodeToAccount: Broadcast[Map[Long, String]], cpCodeWithNoAccount: Accumulator[Long]) extends Serializable with Logging {
def udf(): Long => String = cpcodeToAccountArray
def cpcodeToAccountArray(cpcode: Long): String = {
cpcodeToAccount.value.getOrElse(cpcode, {
cpCodeWithNoAccount += 1
""
})
}
}