是否可以在Spark SQL中将hive表与内存表连接?

时间:2017-11-27 06:15:35

标签: apache-spark apache-spark-sql

我正在学习Spark SQL,并对Spark的SessionCatalog和Hive MetaStore感到困惑。

我了解到HiveSessionStateBuilder将使用HiveSessionCatalog创建一个新的Analyzer。

这是否意味着我们可以在一个spark sql中加入hive表和内存表?

/**
   * Create a [[HiveSessionCatalog]].
   */
  override protected lazy val catalog: HiveSessionCatalog = {
    val catalog = new HiveSessionCatalog(
      externalCatalog,
      session.sharedState.globalTempViewManager,
      new HiveMetastoreCatalog(session),
      functionRegistry,
      conf,
      SessionState.newHadoopConf(session.sparkContext.hadoopConfiguration, conf),
      sqlParser,
      resourceLoader)
    parentState.foreach(_.catalog.copyStateTo(catalog))
    catalog
  }

  /**
   * A logical query plan `Analyzer` with rules specific to Hive.
   */
  override protected def analyzer: Analyzer = new Analyzer(catalog, conf) {
    override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
      new ResolveHiveSerdeTable(session) +:
      new FindDataSourceTable(session) +:
      new ResolveSQLOnFile(session) +:
      customResolutionRules

    override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
      new DetermineTableStats(session) +:
      RelationConversions(conf, catalog) +:
      PreprocessTableCreation(session) +:
      PreprocessTableInsertion(conf) +:
      DataSourceAnalysis(conf) +:
      HiveAnalysis +:
      customPostHocResolutionRules

    override val extendedCheckRules: Seq[LogicalPlan => Unit] =
      PreWriteCheck +:
      customCheckRules
  }

0 个答案:

没有答案