我是大数据的初学者,并且正在使用spark-scala。我使用数据框架,并且为了使事情更清楚,我使用了多个scala对象编写代码。所有类都有运行代码的主要方法。第一个scala对象用于将文件加载到数据帧中,其他scala对象进行统计计算。这是第一个的一些代码
object LoadFiles {
//classes for datasets
case class T(X: Option[String], P: Option[String],Y:Option[String])
println("Load File 1 into dataframe")
def main(args: Array[String]){
val sc = new SparkContext("local[*]", "LoadFiles1")
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val warehouseLocation = new File("spark-warehouse").getAbsolutePath
val spark = SparkSession
.builder()
.appName("Spark Hive Example")
.config("spark.sql.warehouse.dir", warehouseLocation)
.enableHiveSupport()
.getOrCreate()
import sqlContext.implicits._
import org.apache.spark.sql._
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.{StructType,StructField,StringType};
val dataframe1 = sc.textFile("file1.ttl").map(_.split(" |\\ . ")).map(p =>
T(Try(p(0).toString()).toOption,Try(p(1).toString()).toOption,Try(p(2).toString()).toOption)).toDF()
dataframe1
.write
.partitionBy("Predicate")
.mode(SaveMode.Overwrite)
.saveAsTable("dataframe1")
}}
其他scala对象用于根据加载的数据帧进行许多计算并创建其他数据帧
这是第二个
object Statistics1 {
def main(args: Array[String]) {
val sc = new SparkContext("local[*]", "LoadFiles1")
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val warehouseLocation = new File("spark-warehouse").getAbsolutePath
val spark = SparkSession
.builder()
.appName("Spark Hive Example")
.config("spark.sql.warehouse.dir", warehouseLocation)
.enableHiveSupport()
.getOrCreate()
import sqlContext.implicits._
import org.apache.spark.sql._
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.{StructType,StructField,StringType};
// subject query
val Query1 = spark.sql("SELECT X As Res, P as Pred, COUNT(Y) As nbrFROM dataframe1 GROUP BY X, P")
.write
.mode(SaveMode.Overwrite)
.saveAsTable("stat1") }}
我收到错误Exception in thread "main" org.apache.spark.sql.AnalysisException: Table or view not found: dataframe1; line 1 pos 75
我该如何解决?