我正在尝试将数据加载到hive表中,同时将数据加载到hive表中我收到错误“java.lang.StringIndexOutOfBoundsException:String index out of range:0”。 我不知道我在哪里做错了,这个错误耗费了我很多时间 任何帮助将不胜感激
文件:
test.txt
H,DatatypeCode,Description
K,03099,Metric Expanded mple Size
K,05307,Elapsed Day Factor
object testObject {
//Check command line arguments(Input file) is exist or not
def main(args: Array[String]): Unit = {
val sourceTableName= "sourceHiveTable"
val tmpTable="tmpHiveTable"
val userNam =xxxx
val passwrd =xxxx
val readDF = sparkSession.read
.format("com.databricks.spark.csv")
.option("delimiter", ",")
.option("header", "true")
.schema(customSchema)
.load("test.txt")
def loadDFToDB(dataFrame:DataFrame,sourceTableName:String,tmpTable:String,driverName:String,userNam: String,passwrd:String) {
try{
dataFrame.createOrReplaceTempView(tmpTable)
console.log(INFO, Map(
Message->"Storing dataframe into Temporary Table"
))
} catch {
case e: Exception => console.captureStackTrace(e , Map(
Message -> "While converting dataframe to table process got failed,please check the stackTrace for more details"))
fileProcessing.moveFile(sourceFileName,dirPath)
sys.exit(1)
}
val connectionProperties = new Properties()
connectionProperties.put("user", userNam)
connectionProperties.put("password", passwrd)
sparkSession.sqlContext.sql("select * from " + tmpTable )
.write
.mode(SaveMode.Append) // <--- Append in existing table
.option("driver", "com.cloudera.hive.jdbc41.HS2Driver")
.option("header","false")
.jdbc(url, sourceTableName, connectionProperties)
}
loadDFToDB(readDF,sourceTableName,tmpTable,userNam,passwrd)
}
错误:
java.lang.StringIndexOutOfBoundsException: String index out of range: 0
at java.lang.String.charAt(String.java:658)
at com.cloudera.hiveserver2.hivecommon.core.CoreUtils.parseSubName(Unknown Source)
at com.cloudera.hive.jdbc41.HS2Driver.parseSubName(Unknown Source)
at com.cloudera.hiveserver2.jdbc.common.BaseConnectionFactory.acceptsURL(Unknown Source)
at com.cloudera.hiveserver2.jdbc.common.AbstractDriver.connect(Unknown Source)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:59)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:50)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:59)
at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:426)
答案 0 :(得分:0)
我的猜测是Hive url出了问题。 由于stacktrace中的这一行:
at com.cloudera.hiveserver2.jdbc.common.BaseConnectionFactory.acceptsURL(Unknown Source)