我正在尝试通过spark数据帧将数据写入Hbase
foreach
错误:
def main(args: Array[String]): Unit = {
val warehouseLocation = new File("warehouse").getAbsolutePath
val spark = SparkSession.builder().appName("Spark Hive").config("spark.sql.warehouse.dir", warehouseLocation).enableHiveSupport().getOrCreate()
val config = HBaseConfiguration.create()
config.set("hbase.zookeeper.quorum", "zookeeperIP address")
config.set("hbase.zookeeper.property.clientPort", "2181")
config.set(TableInputFormat.INPUT_TABLE, "june_poc_hbase_table")
val newAPIJobConfiguration1 = Job.getInstance(config)
newAPIJobConfiguration1.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "june_poc_hbase_table")
newAPIJobConfiguration1.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]])
import spark.implicits._
val df: DataFrame = Seq(("foo", "1", "foo1"), ("bar", "2", "bar1")).toDF("key", "value1", "value2")
val hbasePuts = df.rdd.map((row: Row) => {
val put = new Put(Bytes.toBytes(row.getString(0)))
put.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("value1"), Bytes.toBytes(row.getString(1)))
put.addColumn(Bytes.toBytes("cf2"), Bytes.toBytes("value2"), Bytes.toBytes(row.getString(2)))
(new ImmutableBytesWritable(), put)
})
hbasePuts.saveAsNewAPIHadoopDataset(newAPIJobConfiguration1.getConfiguration())
}