启动期间Spark-Atlas-Connector NullPointerException

时间:2019-08-12 14:09:55

标签: apache-spark hortonworks-data-platform apache-atlas

我正在尝试开始我为测试与Atlas的集成火花所做的工作。

这是一个简单的工作,它从一个主题读取并写入另一个主题。


  val sparkConf = new SparkConf()
      .setAppName("atlas-test")
      .setMaster("local[2]")
      .set("spark.extraListeners", "com.hortonworks.spark.atlas.SparkAtlasEventTracker")
      .set("spark.sql.queryExecutionListeners", "com.hortonworks.spark.atlas.SparkAtlasEventTracker")
      .set("spark.sql.streaming.streamingQueryListeners", "com.hortonworks.spark.atlas.SparkAtlasStreamingQueryEventTracker")


    val spark = SparkSession.builder()
      .config(sparkConf)
      .enableHiveSupport()
      .getOrCreate()

    import spark.implicits._


    val df = spark.read.format("kafka")
      .option("kafka.bootstrap.servers", BROKER_SERVERS)
      .option("subscribe", "foobar2")
      .option("startingOffset", "earliest")
      .option("kafka.atlas.cluster.name", clusterName)
      .load()

    println("---------------------------------------------")

    df.printSchema()
    val dfs = df.selectExpr("CAST(key as STRING)","CAST(value AS STRING)").as[(String, String)]

    dfs.show()
    println("---------------------------------------------")

    df.write
      .format("kafka")
      .option("kafka.bootstrap.servers", BROKER_SERVERS)
      .option("topic", "foobar-out")
      .option("kafka.atlas.cluster.name", clusterName)
      .save()

一切似乎都是可以理解的。因此,我尝试在我的IDE(Intellij)中运行该作业,几乎每次我遇到此异常时

19/08/12 17:00:08 WARN SparkExecutionPlanProcessor: Caught exception during parsing event
java.lang.NullPointerException
    at org.apache.spark.sql.internal.SQLConf$$anonfun$14.apply(SQLConf.scala:133)
    at org.apache.spark.sql.internal.SQLConf$$anonfun$14.apply(SQLConf.scala:133)
    at scala.Option.map(Option.scala:146)
    at org.apache.spark.sql.internal.SQLConf$.get(SQLConf.scala:133)
    at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.simpleString(SaveIntoDataSourceCommand.scala:52)
    at org.apache.spark.sql.catalyst.plans.QueryPlan.verboseString(QueryPlan.scala:177)
    at org.apache.spark.sql.catalyst.trees.TreeNode.generateTreeString(TreeNode.scala:548)
    at org.apache.spark.sql.catalyst.trees.TreeNode.treeString(TreeNode.scala:472)
    at org.apache.spark.sql.execution.QueryExecution$$anonfun$4.apply(QueryExecution.scala:197)
    at org.apache.spark.sql.execution.QueryExecution$$anonfun$4.apply(QueryExecution.scala:197)
    at org.apache.spark.sql.execution.QueryExecution.stringOrError(QueryExecution.scala:99)
    at org.apache.spark.sql.execution.QueryExecution.toString(QueryExecution.scala:197)
    at com.hortonworks.spark.atlas.sql.CommandsHarvester$.com$hortonworks$spark$atlas$sql$CommandsHarvester$$getPlanInfo(CommandsHarvester.scala:214)
    at com.hortonworks.spark.atlas.sql.CommandsHarvester$.com$hortonworks$spark$atlas$sql$CommandsHarvester$$makeProcessEntities(CommandsHarvester.scala:222)
    at com.hortonworks.spark.atlas.sql.CommandsHarvester$SaveIntoDataSourceHarvester$.harvest(CommandsHarvester.scala:183)
    at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor$$anonfun$2.apply(SparkExecutionPlanProcessor.scala:108)
    at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor$$anonfun$2.apply(SparkExecutionPlanProcessor.scala:89)
    at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
    at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
    at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
    at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor.process(SparkExecutionPlanProcessor.scala:89)
    at com.hortonworks.spark.atlas.sql.SparkExecutionPlanProcessor.process(SparkExecutionPlanProcessor.scala:63)
    at com.hortonworks.spark.atlas.AbstractEventProcessor$$anonfun$eventProcess$1.apply(AbstractEventProcessor.scala:72)
    at com.hortonworks.spark.atlas.AbstractEventProcessor$$anonfun$eventProcess$1.apply(AbstractEventProcessor.scala:71)
    at scala.Option.foreach(Option.scala:257)
    at com.hortonworks.spark.atlas.AbstractEventProcessor.eventProcess(AbstractEventProcessor.scala:71)
    at com.hortonworks.spark.atlas.AbstractEventProcessor$$anon$1.run(AbstractEventProcessor.scala:38)

我正在将Spark 2.4.0与Scala 2.11一起使用

我对结果有些误解。老实说,在我的地图集(本地计算机)中完成这项工作后,我会无法理解吗?因为有时作业运行成功,但Atlas中什么也没出现。

0 个答案:

没有答案