我想在提交spark作业时加载属性配置文件,因此我可以根据不同的环境加载正确的配置,例如测试环境或产品环境。但是我不知道在哪里放置属性文件,这里是加载属性文件的代码:
object HbaseRDD {
val QUORUM_DEFAULT = "172.16.1.10,172.16.1.11,172.16.1.12"
val TIMEOUT_DEFAULT = "120000"
val config = Try {
val prop = new Properties()
prop.load(new FileInputStream("hbase.properties"))
(
prop.getProperty("hbase.zookeeper.quorum", QUORUM_DEFAULT),
prop.getProperty("timeout", TIMEOUT_DEFAULT)
)
}
def getHbaseRDD(tableName: String, appName:String = "test", master:String = "spark://node0:7077") = {
val sparkConf = new SparkConf().setAppName(appName).setMaster(master)
val sc = new SparkContext(sparkConf)
val conf = HBaseConfiguration.create()
config match {
case Success((quorum, timeout)) =>
conf.set("hbase.zookeeper.quorum", quorum)
conf.set("timeout", timeout)
case Failure(ex) =>
ex.printStackTrace()
conf.set("hbase.zookeepr.quorum", QUORUM_DEFAULT)
conf.set("timeout", TIMEOUT_DEFAULT)
}
conf.set(TableInputFormat.INPUT_TABLE, tableName)
val hbaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result])
hbaseRDD
}
}
问题是我放置hbase.properties
文件以便火花可以找到并加载它?或者如何通过spark-submit
指定它?
答案 0 :(得分:6)
请按照此示例(Spark 1.5)配置:
check Run-time Environment configurations这些配置选项会将一个版本更改为另一个版本,您可以查看相应的runtume配置文档
spark-submit --verbose --class <your driver class > \
--master yarn-client \
--num-executors 12 \
--driver-memory 1G \
--executor-memory 2G \
--executor-cores 4 \
--conf "spark.executor.extraJavaOptions=-verbose:gc -XX:+UseSerialGC -XX:+UseCompressedOops -XX:+UseCompressedStrings -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:PermSize=256M -XX:MaxPermSize=512M" \
--conf "spark.driver.extraJavaOptions=-XX:PermSize=256M -XX:MaxPermSize=512M" \
--conf "spark.shuffle.memoryFraction=0.5" \
--conf "spark.worker.cleanup.enabled=true" \
--conf "spark.worker.cleanup.interval=3600" \
--conf "spark.shuffle.io.numConnectionsPerPeer=5" \
--conf "spark.eventlog.enabled=true" \
--conf "spark.driver.extraLibrayPath=$HADOOP_HOME/*:$HBASE_HOME/*:$HADOOP_HOME/lib/*:$HBASE_HOME/lib/htrace-core-3.1.0-incubating.jar:$HDFS_PATH/*:$SOLR_HOME/*:$SOLR_HOME/lib/*" \
--conf "spark.executor.extraLibraryPath=$HADOOP_HOME/*:$folder/*:$HADOOP_HOME/lib/*:$HBASE_HOME/lib/htrace-core-3.1.0-incubating.jar:$HDFS_PATH/*:$SOLR_HOME/*:$SOLR_HOME/lib/*" \
--conf "spark.executor.extraClassPath=$OTHER_JARS:hbase.Properties" \
--conf "spark.yarn.executor.memoryOverhead=2048" \
--conf "spark.yarn.driver.memoryOverhead=1024" \
--conf "spark.eventLog.overwrite=true" \
--conf "spark.shuffle.consolidateFiles=true" \
--conf "spark.akka.frameSize=1024" \
--properties-file yourconfig.conf \
--files hbase.properties \
--jars $your_JARS\
另外,请查看