org.apache.spark.SparkException:通过Spark将数据写入Hbase时,写入行时任务失败

时间:2019-06-06 16:30:09

标签: apache-spark apache-spark-sql

我正在尝试通过spark数据帧将数据写入Hbase

foreach

错误:

def main(args: Array[String]): Unit = {

    val warehouseLocation = new File("warehouse").getAbsolutePath

    val spark = SparkSession.builder().appName("Spark Hive").config("spark.sql.warehouse.dir", warehouseLocation).enableHiveSupport().getOrCreate()


    val config = HBaseConfiguration.create()
    config.set("hbase.zookeeper.quorum", "zookeeperIP address")
    config.set("hbase.zookeeper.property.clientPort", "2181")
    config.set(TableInputFormat.INPUT_TABLE, "june_poc_hbase_table")

    val newAPIJobConfiguration1 = Job.getInstance(config)
    newAPIJobConfiguration1.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "june_poc_hbase_table")
    newAPIJobConfiguration1.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]])
    import spark.implicits._
    val df: DataFrame = Seq(("foo", "1", "foo1"), ("bar", "2", "bar1")).toDF("key", "value1", "value2")

    val hbasePuts = df.rdd.map((row: Row) => {
      val put = new Put(Bytes.toBytes(row.getString(0)))
      put.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("value1"), Bytes.toBytes(row.getString(1)))
      put.addColumn(Bytes.toBytes("cf2"), Bytes.toBytes("value2"), Bytes.toBytes(row.getString(2)))
      (new ImmutableBytesWritable(), put)
    })
    hbasePuts.saveAsNewAPIHadoopDataset(newAPIJobConfiguration1.getConfiguration())
  }

0 个答案:

没有答案