检查表是否存在Spark jdbc

时间:2019-04-23 15:00:31

标签: sql-server scala apache-spark spark-jdbc

我正在使用Spark JDBC从Microsoft SQL服务器读取一些数据到数据帧中。并且当该表不存在时(例如,它被意外删除),我得到一个异常:com.microsoft.sqlserver.jdbc.SQLServerException:无效的对象名称'TestAllData'。

我想创建某种机制来首先检查表是否存在,然后才读取数据。有没有办法使用Spark JDBC做到这一点? 因为我尝试使用if从ms sql服务器构造的存在,但它不适用于Spark查询。

当前,我用于读取数据的代码如下:

     def getDataQuery() = {
    s"(select * from TestData) as subq"
  }


def jdbcOptions(dataQuery: String, partitionColumn: String, lowerBound: String, upperBound: String, numPartitions: String) = Map[String,String](
    "driver" -> config.getString("sqlserver.db.driver"),
    "url" -> config.getString("sqlserver.db.url"),
    "user" -> config.getString("sqlserver.db.user"),
    "password" -> config.getString("sqlserver.db.password"),
    "customSchema" -> config.getString("sqlserver.db.custom_schema"),
    "dbtable" -> dataQuery,
    "partitionColumn" -> partitionColumn,
    "lowerBound" -> lowerBound,
    "upperBound" -> upperBound,
    "numPartitions" -> numPartitions
  )

    val dataDF = sparkSession
      .read
      .format("jdbc")
      .options(jdbcOptions(getDataQuery()))
      .load()

2 个答案:

答案 0 :(得分:1)

您可以检查查询以及是否:

def tableExist() = {
  s"show tables in default"
}

val existDF = sparkSession
  .read
  .format("jdbc")
  .options(jdbcOptions(tableExist()))
  .load()

val dataDF = if (existDF.select("tableName").collect().map(_ (0)).contains("TestData"))
  sparkSession
    .read
    .format("jdbc")
    .options(jdbcOptions(getDataQuery()))
    .load()

答案 1 :(得分:0)

Pablo López Gallego 为 Postgres 编写的概念相同

object JdbcLoader extends App{

  val finalUrl = s"jdbc:postgresql://localhost:5432/my_db?ApplicationName=test"
  val user = "user"
  val password = "pass"

  val sparkConf = new SparkConf()
  sparkConf.setMaster(s"local[2]")
  val spark = SparkSession.builder().config(sparkConf).getOrCreate()


  def loadTable(tableName:String ): DataFrame ={

    val opts: Map[String, String] = Map(
      "url" -> finalUrl,
      "user" -> user,
      "password" -> password,
      "dbtable" -> tableName
    )

    spark.sqlContext.
      read.
      format("jdbc").
      options(opts).
      load
  }

  def checkIfTableExists(tableName: String) : Boolean = {

    var schema = "public"
    var table = tableName
    if (tableName.contains(".")){
      val schemaAndTable = tableName.split("\\.")
      schema = schemaAndTable.head
      table = schemaAndTable.last
    }

    val tableExistQ = s"(SELECT table_name FROM information_schema.tables WHERE table_schema='${schema}'" +
      s" AND table_type='BASE TABLE' and table_name = '${table}') as FOO"

    val df = loadTable(tableExistQ)
    df.count() > 0

  }


  println(checkIfTableExists("my_schema.users"))

}