我有以下火花配置:
return SparkSession.builder \
.master("yarn") \
.appName("my_application) \
.config("spark.driver.bindAddress", "0.0.0.0") \
.config("spark.driver.host", host) \
.config("spark.driver.port", port) \
.config("spark.driver.memory", "4G") \
.config("spark.executor.memory", "8G") \
.config("spark.blockManager.port", port_block_manager) \
.config("spark.driver.allowMultipleContexts", "true") \
.enableHiveSupport() \
.getOrCreate()
从spark我加载一个数据帧到pandas。我在微服务中使用它来向前端提供数据。当只有一个请求时,一切正常,但如果两个不同的前端同时请求相同的东西我会收到错误:
17/09/21 13:04:56 ERROR spark.SparkContext: Error initializing SparkContext.
org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master.
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:85)
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:62)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:173)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:509)
at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:236)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
17/09/21 13:04:56 WARN cluster.YarnSchedulerBackend$YarnSchedulerEndpoint: Attempted to request executors before the AM has registered!
17/09/21 13:04:56 WARN metrics.MetricsSystem: Stopping a MetricsSystem that is not running
Traceback (most recent call last):
File "__service__.py", line 52, in <module>
main()
File "__service__", line 113, in get_pos_enriched
spark = get_spark_session("adjusted_sales.temp_data_read")
File "__service__", line 61, in get_spark_session
.config("spark.driver.allowMultipleContexts", "true") \
File "/usr/local/spark/python/pyspark/sql/session.py", line 169, in getOrCreate
sc = SparkContext.getOrCreate(sparkConf)
File "/usr/local/spark/python/pyspark/context.py", line 334, in getOrCreate
SparkContext(conf=conf or SparkConf())
File "/usr/local/spark/python/pyspark/context.py", line 118, in __init__
conf, jsc, profiler_cls)
File "/usr/local/spark/python/pyspark/context.py", line 180, in _do_init
self._jsc = jsc or self._initialize_context(self._conf._jconf)
File "/usr/local/spark/python/pyspark/context.py", line 273, in _initialize_context
return self._jvm.JavaSparkContext(jconf)
File "/usr/local/lib/python2.7/dist-packages/py4j/java_gateway.py", line 1428, in __call__
answer, self._gateway_client, None, self._fqn)
File "/usr/local/lib/python2.7/dist-packages/py4j/protocol.py", line 320, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master.
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:85)
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:62)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:173)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:509)
at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:236)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
我认为设置spark.driver.allowMultipleContexts = true
会成功,但显然不是。如何才能使SparkSession
独立于使用量?我可以为每次使用创建单独的会话或上下文吗?