我在EMR群集上运行了HBase,并尝试使用本地计算机中的Spark访问其中的表。
它似乎连接到了zookeeper,但甚至不知道我寻找的表是否存在。
这是我的代码,以及hbase-site.xml文件和我得到的消息。
package org.apache.spark.examples
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark._
object HBaseTestEMR {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local[4]")
val sc = new SparkContext(sparkConf)
val conf = HBaseConfiguration.create()
val table_name="empl"
conf.addResource(new Path("/home/spark/development/hbase/conf/hbase-site.xml"))
conf.set(TableInputFormat.INPUT_TABLE, table_name)
println("-------------1")
val admin = new HBaseAdmin(conf)
//println(admin.listTables())
println("-------------2")
if (admin.isTableAvailable(table_name)) println("la table existe")
else println("la table n'existe pas")
println("-------------3")
sc.stop()
}
}
HBase的-site.xml中
<configuration>
<property><name>fs.hdfs.impl</name><value>emr.hbase.fs.BlockableFileSystem</value></property>
<property><name>hbase.regionserver.handler.count</name><value>100</value></property>
<property><name>hbase.zookeeper.quorum</name><value>ec2-52-26-***-***.us-west-2.compute.amazonaws.com</value></property>
<property><name>hbase.rootdir</name><value>hdfs://10.0.0.25:9000/hbase</value></property>
<property><name>hbase.cluster.distributed</name><value>true</value></property>
<property><name>hbase.tmp.dir</name><value>/mnt/var/lib/hbase/tmp-data</value></property>
</configuration>
和我得到的消息
15/06/10 12:00:28 INFO ZooKeeper: Client environment:java.io.tmpdir=/tmp
15/06/10 12:00:28 INFO ZooKeeper: Client environment:java.compiler=<NA>
15/06/10 12:00:28 INFO ZooKeeper: Client environment:os.name=Linux
15/06/10 12:00:28 INFO ZooKeeper: Client environment:os.arch=amd64
15/06/10 12:00:28 INFO ZooKeeper: Client environment:os.version=3.2.0-67-generic
15/06/10 12:00:28 INFO ZooKeeper: Client environment:user.name=spark
15/06/10 12:00:28 INFO ZooKeeper: Client environment:user.home=/home/spark
15/06/10 12:00:28 INFO ZooKeeper: Client environment:user.dir=/home/spark/projetWordCount
15/06/10 12:00:28 INFO ZooKeeper: Initiating client connection, connectString=ec2-52-26-***-***.us-west-2.compute.amazonaws.com:2181 sessionTimeout=90000 watcher=hconnection-0x7ecf3c090x0, quorum=ec2-52-26-***-***.us-west-2.compute.amazonaws.com:2181, baseZNode=/hbase
15/06/10 12:00:28 INFO ClientCnxn: Opening socket connection to server ec2-52-26-***-***.us-west-2.compute.amazonaws.com/52.26.***.***:2181. Will not attempt to authenticate using SASL (unknown error)
15/06/10 12:00:28 INFO ClientCnxn: Socket connection established to ec2-52-26-***-***.us-west-2.compute.amazonaws.com/52.26.***.***:2181, initiating session
15/06/10 12:00:28 INFO ClientCnxn: Session establishment complete on server ec2-52-26-***-***.us-west-2.compute.amazonaws.com/52.26.***.***:2181, sessionid = 0x14ddc7d70ed0023, negotiated timeout = 90000
-------------2
然后没有任何事情发生
所以,有可能做我想要的吗?我的配置的哪一部分是错的?