我正在尝试使用Spark HortonWorks Connector(SHC)将数据导入HBase数据库 这是我的脚本:
import org.apache.spark.sql.execution.datasources.hbase.HBaseTableCatalog
import org.apache.spark.sql.DataFrame
object Simple{
def main(args: Array[String]) {
val df = spark.read.csv("/home/hung/Desktop/data/100mils.csv")
val newNames = Seq("id", "c1", "c2", "c3","c4","c5")
val dfRenamed = df.toDF(newNames: _*)
def my_catalog = s"""{
|"table":{"namespace":"default", "name":"table1"},
|"rowkey":"key",
|"columns":{
|"id":{"cf":"rowkey", "col":"key", "type":"string"},
|"c2":{"cf":"science", "col":"math", "type":"string"},
|"c3":{"cf":"science", "col":"physics", "type":"string"},
|"c4":{"cf":"science", "col":"chemistry", "type":"string"},
|"c5":{"cf":"language", "col":"english", "type":"string"},
|"c6":{"cf":"language", "col":"chinese", "type":"string"}
|}
|}""".stripMargin
dfRenamed
.write.options(Map(HBaseTableCatalog.tableCatalog
-> my_catalog, HBaseTableCatalog.newTable
->"5"))
.format("org.apache.spark.sql.execution.datasources.hbase")
.save()
这是我尝试编译代码时的错误
import org.apache.spark.sql.execution.datasources.hbase.HBaseTableCatalog
import org.apache.spark.sql.DataFrame
object Simple{
def main(args: Array[String]) {
val df = spark.read.csv("/home/hung/Desktop/data/100mils.csv")
val newNames = Seq("id", "c1", "c2", "c3","c4","c5")
val dfRenamed = df.toDF(newNames: _*)
def my_catalog = s"""{
|"table":{"namespace":"default", "name":"table1"},
|"rowkey":"key",
|"columns":{
|"id":{"cf":"rowkey", "col":"key", "type":"string"},
|"c2":{"cf":"science", "col":"math", "type":"string"},
|"c3":{"cf":"science", "col":"physics", "type":"string"},
|"c4":{"cf":"science", "col":"chemistry", "type":"string"},
|"c5":{"cf":"language", "col":"english", "type":"string"},
|"c6":{"cf":"language", "col":"chinese", "type":"string"}
|}
|}""".stripMargin
dfRenamed
.write.options(Map(HBaseTableCatalog.tableCatalog
-> my_catalog, HBaseTableCatalog.newTable
->"5"))
.format("org.apache.spark.sql.execution.datasources.hbase")
.save()
我遵循github.com/hortonworks-spark/shc中的指南。 这是我第一次使用scala,因为hbase是我的最终项目。