我有一个结构化的流程序,我想从外部文件传递属性。
我尝试使用typesafe config,但是它无法执行,而且我不确定这是否是正确的方法。
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.{col, _}
import org.apache.spark.sql.streaming.Trigger
import com.typesafe.config._
object OSB_kafkaToSpark {
def main(args: Array[String]): Unit = {
val props = ConfigFactory.load()
val envProps = props.getConfig(args(0))
val spark = SparkSession.
builder().
appName("OSB_kafkaToSpark").
master(props.getConfig(args(0)).getString("executionMode")).
getOrCreate()
import spark.implicits._
val df = spark
.readStream
.format("kafka")
.option("kafka.bootstrap.servers", "somekafkahost:9092")
.option("subscribe", "test_test")
.option("startingOffsets", "latest")
.option("failOnDataLoss", "false")
.load()
val dfs = df.selectExpr("CAST(value AS STRING)")
val pathstring = "/user/svc_npeishanzo/testing".concat(args(1))
val query = dfs.writeStream
.format("json")
.option("path", pathstring)
.option("checkpointLocation", "/user/checkpoint")
.outputMode("append")
.trigger(Trigger.Once())
.start()
.awaitTermination()
}
}
我想从.properties文件传递以下参数,其中属性文件应该在spark2-submit中
路径字符串
spark2-submit --class kafkaToSpark --master yarn --deploy-mode client --num-executors 8 --executor-memory 4g /home/streaming_osb_2.11-0.1.0-SNAPSHOT.jar