JsonProtocol NoClassDefFoundError

时间:2019-06-24 07:20:31

标签: json scala apache-spark protocols

我每天尝试将DataSet保存到索引ElasticSearch中(与Oozie一起安排),但有时会出现此错误 java.lang.NoClassDefFoundError:无法初始化org.apache.spark.util.JsonProtocol类因此工作立即失败。我不知道为什么会出现此错误。

代码:

private def readSource1()(implicit spark: SparkSession): DataFrame = {
  import spark.implicits._

  val sourceName = "dictionary.source1"
  val plantsPath: String = config.getString("sources." + sourceName + ".path")
  spark.read
    .option("delimiter", ";")
    .option("header", "true")
    .csv(plantsPath)
    .select('id as "sourceId", 'assembly_site_id)
}

private def readSource2()(implicit spark: SparkSession): DataFrame = {
  import spark.implicits._

  val source2: SourceIO = SourceManager(config)("source2")
  (startDate, endDate) match {
    case (Some(sd), Some(ed)) ⇒ source2.loadDf()
      .where('assemblyEndDate.between(Date.valueOf(sd), Date.valueOf(ed)) ||
      'tctDate.between(Date.valueOf(sd), Date.valueOf(ed)))
    case _ ⇒ source2.loadDf()
  }
}

def saveSourceToEs(implicit sparkSession: SparkSession): Unit = {
  val source1: DataFrame = readSource1()
  val source2: DataFrame = readSource2()
  val source: Dataset[Source] =     buildSource(this.getSource(source1, source2))
source.saveToEs(s"source_${createDateString()}/_doc")
}


object SourceIndexer extends SparkApp with Configurable with Logging {

val config: Config = ConfigFactory.load()

def apply(
  sourceID: Option[String]    = None,
  startDate:  Option[LocalDate] = None,
  endDate:    Option[LocalDate] = None
): SourceIndexer = {

  new SourceIndexer(config, sourceID, startDate, endDate)
}

def main(args: Array[String]): Unit = {
  try {
    val bootConfig = BootConfig.parseSourceIndexer(args)
  this.apply(bootConfig.sourceID, bootConfig.startDate, bootConfig.endDate)
    .saveSourceToEs(spark)
  } finally {
    spark.sparkContext.stop()
  }
}

}

感谢您的帮助。

0 个答案:

没有答案