我一直在努力解决这个问题。
步骤-1
我创建一个hive表并按如下方式加载数据: -
create external table if not exists productstorehtable2
(
device string,
date string,
word string,
count int
)
row format delimited fields terminated by ','
location 'hdfs://quickstart.cloudera:8020/user/cloudera/hadoop/hive/warehouse/VerizonProduct2';
LOAD DATA INPATH 'hdfs://quickstart.cloudera:8020/user/cloudera/hadoop/input/productstore' INTO TABLE productstorehtable2;
步骤2:我写了一个简单的火花脚本来检查理智
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext, Time}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.sql.SQLContext
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext
import org.apache.spark
import org.apache.spark.sql.hive._
import org.apache.log4j.{Level, Logger}
import java.util.regex.Pattern
import java.util.regex.Matcher
//import Utilities._
object HivePortStreamer {
def readFromHiveTable(hivecontext:org.apache.spark.sql.hive.HiveContext) =
{
import hivecontext.implicits._
//val productDF=hivecontext.sql("select * from productstorehtable2")
//println(productDF.show())
println("PRINTING THE HIVE TABLES")
println(hivecontext.sql("show tables"))
}
def main(args: Array[String]) {
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)
// Create the context with a 1 second batch size
val conf = new SparkConf().setAppName("hivePortStreamer").setMaster("local[*]")
.set("spark.sql.warehouse.dir", "hdfs://quickstart.cloudera:8020/user/cloudera/hadoop/hive/warehouse/VerizonProduct2")
//val ssc = new StreamingContext(conf, Seconds(1))
val sparkcontext=new SparkContext(conf)
val hivecontext=new org.apache.spark.sql.hive.HiveContext(sparkcontext)
readFromHiveTable(hivecontext)
sparkcontext.stop()
}
}
当我尝试运行此脚本时,它只显示空白。我不明白;我已经给出了正确的仓库目录位置。 ' show databases'的情况也是如此。命令。
我的系统上如何配置spark和配置单元是否存在问题?
编辑1:Spark无法发现配置单元表。我尝试了命令
println(hivecontext.sql(" create table dummytable(id int)"))
按预期创建配置表
请帮助。
由于
后台:CentOS,cloudera quickstart VM,spark 2.0