我正在学习Spark SQL,并对Spark的SessionCatalog和Hive MetaStore感到困惑。
我了解到HiveSessionStateBuilder将使用HiveSessionCatalog创建一个新的Analyzer。
这是否意味着我们可以在一个spark sql中加入hive表和内存表?
/**
* Create a [[HiveSessionCatalog]].
*/
override protected lazy val catalog: HiveSessionCatalog = {
val catalog = new HiveSessionCatalog(
externalCatalog,
session.sharedState.globalTempViewManager,
new HiveMetastoreCatalog(session),
functionRegistry,
conf,
SessionState.newHadoopConf(session.sparkContext.hadoopConfiguration, conf),
sqlParser,
resourceLoader)
parentState.foreach(_.catalog.copyStateTo(catalog))
catalog
}
/**
* A logical query plan `Analyzer` with rules specific to Hive.
*/
override protected def analyzer: Analyzer = new Analyzer(catalog, conf) {
override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
new ResolveHiveSerdeTable(session) +:
new FindDataSourceTable(session) +:
new ResolveSQLOnFile(session) +:
customResolutionRules
override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
new DetermineTableStats(session) +:
RelationConversions(conf, catalog) +:
PreprocessTableCreation(session) +:
PreprocessTableInsertion(conf) +:
DataSourceAnalysis(conf) +:
HiveAnalysis +:
customPostHocResolutionRules
override val extendedCheckRules: Seq[LogicalPlan => Unit] =
PreWriteCheck +:
customCheckRules
}