我尝试在现有的hive表中推送数据,我已经在hive中创建了orc表,无法在hive中推送数据。如果我在spark控制台上复制粘贴但不能通过spark-submit运行,则此代码可以正常工作。
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
object TestCode {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("first example").setMaster("local")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
for (i <- 0 to 100 - 1) {
// sample value but it replace with business logic. and try to push into table.for loop consider as business logic.
var fstring = "fstring" + i
var cmd = "cmd" + i
var idpath = "idpath" + i
import sqlContext.implicits._
val sDF = Seq((fstring, cmd, idpath)).toDF("t_als_s_path", "t_als_s_cmd", "t_als_s_pd")
sDF.write.insertInto("l_sequence");
//sDF.write.format("orc").saveAsTable("l_sequence");
println("write data ==> " + i)
}
}
给出错误。
Exception in thread "main" org.apache.spark.sql.AnalysisException: Table or view not found: l_sequence;
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveRelations$$lookupTableFromCatalog(Analyzer.scala:449)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:455)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$8.applyOrElse(Analyzer.scala:453)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:453)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:443)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82)
at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:65)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:63)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:51)
at org.apache.spark.sql.execution.QueryExecution.withCachedData$lzycompute(QueryExecution.scala:69)
at org.apache.spark.sql.execution.QueryExecution.withCachedData(QueryExecution.scala:68)
at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:74)
at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:74)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:78)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:76)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:83)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:83)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:86)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:86)
at org.apache.spark.sql.DataFrameWriter.insertInto(DataFrameWriter.scala:259)
at org.apache.spark.sql.DataFrameWriter.insertInto(DataFrameWriter.scala:239)
at com.hq.bds.Helloword$$anonfun$main$1.apply$mcVI$sp(Helloword.scala:16)
at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)
at com.hq.bds.Helloword$.main(Helloword.scala:10)
at com.hq.bds.Helloword.main(Helloword.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:729)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
答案 0 :(得分:0)
您需要将hive-site.xml与spark conf或copy hive-site.xml链接到spark conf目录。火花不是 能够找到您的hive Metastore(默认情况下为derby数据库),因此我们必须将hive-conf链接到spark conf direcrtory。
最后,要将Spark SQL连接到现有的Hive安装,必须将hive-site.xml文件复制到Spark的配置目录($ SPARK_HOME / conf)。如果你 如果没有现有的Hive安装,Spark SQL仍会运行。
Sudo到root用户,然后将hive-site复制到spark conf目录。
sudo -u root
cp /etc/hive/conf/hive-site.xml /etc/spark/conf