我想用spark连接hbase。 我得到一个例外。当我试图用scala做同样的事情时,我没有得到这样的错误。
我使用的是scala 2.10.4,spark: - 1.3.0 CDH 5.4.0。
代码是:
import org.apache.spark.SparkContext
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.client.Put
import org.apache.spark.SparkConf
import com.cloudera.spark.hbase.HBaseContext
object HBaseBulkPutExample {
def main(args: Array[String]) {
val tableName = "image_table";
val columnFamily = "Image Data"
val sparkConf = new SparkConf().setAppName("HBaseBulkPutExample " + tableName + " " + columnFamily)
val sc = new SparkContext(sparkConf)
val rdd = sc.parallelize(Array(
(Bytes.toBytes("1"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("1")))),
(Bytes.toBytes("2"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("2")))),
(Bytes.toBytes("3"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("3")))),
(Bytes.toBytes("4"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("4")))),
(Bytes.toBytes("5"), Array((Bytes.toBytes(columnFamily), Bytes.toBytes("1"), Bytes.toBytes("5"))))
)
)
val conf = HBaseConfiguration.create();
conf.addResource(new Path("/eds/servers//hbase-1.0.1.1/conf/hbase-site.xml"));
val hbaseContext = new HBaseContext(sc, conf);
hbaseContext.bulkPut[(Array[Byte], Array[(Array[Byte], Array[Byte], Array[Byte])])](rdd,
tableName,
(putRecord) => {
val put = new Put(putRecord._1)
putRecord._2.foreach((putValue) => put.add(putValue._1, putValue._2, putValue._3))
put
},
true);
}
}
当我创建一个jar并执行它时,我收到以下错误:
org.apache.hadoop.hbase.DoNotRetryIOException: java.lang.NoSuchMethodError: org.apache.hadoop.net.NetUtils.getInputStream(Ljava/net/Socket;)Lorg/apache/hadoop/net/SocketInputWrapper;
at org.apache.hadoop.hbase.ipc.RpcClient$Connection.setupIOstreams(RpcClient.java:928)
at org.apache.hadoop.hbase.ipc.RpcClient.getConnection(RpcClient.java:1543)
at org.apache.hadoop.hbase.ipc.RpcClient.call(RpcClient.java:1442)
at org.apache.hadoop.hbase.ipc.RpcClient.callBlockingMethod(RpcClient.java:1661)
at org.apache.hadoop.hbase.ipc.RpcClient$BlockingRpcChannelImplementation.callBlockingMethod(RpcClient.java:1719)
at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$BlockingStub.get(ClientProtos.java:30304)
at org.apache.hadoop.hbase.protobuf.ProtobufUtil.getRowOrBefore(ProtobufUtil.java:1562)
at org.apache.hadoop.hbase.client.HTable$2.call(HTable.java:711)
at org.apache.hadoop.hbase.client.HTable$2.call(HTable.java:709)
at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:114)
at org.apache.hadoop.hbase.client.HTable.getRowOrBefore(HTable.java:715)
at org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:144)
at org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.prefetchRegionCache(HConnectionManager.java:1140)
答案 0 :(得分:0)
我不确定错误的原因是什么。看起来像调用方法与您的环境版本不匹配。
以下是如何使用spark:
连接Hbase的示例import spark._
import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor}
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
...
val conf = HBaseConfiguration.create()
conf.set(TableInputFormat.INPUT_TABLE, "image_table")
// Initialize
val admin = new HBaseAdmin(conf)
if(!admin.isTableAvailable(input_table)) {
val tableDesc = new HTableDescriptor("image_table")
admin.createTable(tableDesc)
}
val rdd = sc.newAPIHadoopRDD(
conf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result])
Result 类包括获取值的各种方法