我正在尝试通过使用docker容器使用马拉松向其他群集上的纱线提交作业,该docker容器安装了hadoop和spark二进制文件,并且具有hadoop_conf_dir和yarn_corn_dir的正确路径。 但是,当我尝试执行Spark-submit时,它无法访问EMR中的hadoop文件系统。我尝试使用spark.yarn.jars和spark.yarn.archive,但失败了
命令:
"cmd": "/opt/spark-2.2.1-bin-hadoop2.7/bin/spark-submit --verbose \\\n --name emr_external_mpv_streaming \\\n --master yarn \\\n --deploy-mode client \\\n --conf spark.yarn.jars='hdfs://ip-10-150-4-xxx.ec2.internal:8020/user/root/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar' \\\n --conf spark.executor.instances=4 \\\n --conf spark.executor.cores=1 \\\n --conf spark.executor.memory=1g \\\n --conf spark.driver.memory=1g \\\n --conf spark.cores.max=4 \\\n --conf spark.executorEnv.EXT_WH_HOST=$EXT_WH_HOST \\\n --conf spark.executorEnv.EXT_WH_PASSWORD=$EXT_WH_PASSWORD \\\n --conf spark.executorEnv.KAFKA_BROKER_LIST=$_KAFKA_BROKER_LIST \\\n --conf spark.executorEnv.SCHEMA_REGISTRY_URL=$SCHEMA_REGISTRY_URL \\\n --conf spark.executorEnv.AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \\\n --conf spark.executorEnv.AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \\\n --conf spark.executorEnv.STAGING_S3_BUCKET=$STAGING_S3_BUCKET \\\n --conf spark.executorEnv.KAFKA_GROUP_ID=$KAFKA_GROUP_ID \\\n --conf spark.executorEnv.MAX_RATE=$MAX_RATE \\\n --conf spark.executorEnv.KAFKA_MAX_POLL_MS=$KAFKA_MAX_POLL_MS \\\n --conf spark.executorEnv.KAFKA_MAX_POLL_RECORDS=$KAFKA_MAX_POLL_RECORDS \\\n --class com.ticketnetwork.edwstream.external.MapPageView \\\n /root/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar",
我添加了log4j.properties文件,并将此行更改为INFO log4j.logger.org.apache.spark.repl.Main = INFO
root@7fd388c6d69b:/opt/spark-2.2.1-bin-hadoop2.7/conf# cat log4j.properties
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Set everything to be logged to the console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Set the default spark-shell log level to WARN. When running the spark-shell, the
# log level for this class is used to overwrite the root logger's log level, so that
# the user can have different defaults for the shell and regular Spark apps.
log4j.logger.org.apache.spark.repl.Main=INFO
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
log4j.logger.org.apache.parquet=ERROR
log4j.logger.parquet=ERROR
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
更新的日志:我们在spark主页和hadoop主页目录中添加了core-site.xml,yarn-site.xml和hdfs-site.xml文件,它修复了文件找不到错误。现在,它显示了一个新错误
I0912 14:16:53.273274 809 fetcher.cpp:564] Fetcher Info: {"cache_directory":"\/tmp\/mesos\/fetch\/root","items":[{"action":"BYPASS_CACHE","uri":{"cache":false,"executable":false,"extract":false,"value":"http:\/\/nexus.ticketnetwork.com\/repository\/maven-snapshots\/com\/ticketnetwork\/edw\/edw-stream-tr-emr_2.11\/2-SNAPSHOT\/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar"}}],"sandbox_directory":"\/var\/lib\/mesos\/slaves\/ea5da916-2d9b-445c-af39-35ef55004f9c-S4\/frameworks\/68397677-bc51-4853-aaf7-8245792ee3e5-0000\/executors\/edw_external_emr-test-new.7f4bbed3-b696-11e8-aeb4-02428627103d\/runs\/ad7ae2dd-75d3-473e-bcba-e9d7f7ce0d99","stall_timeout":{"nanoseconds":60000000000},"user":"root"}
I0912 14:16:53.276952 809 fetcher.cpp:461] Fetching URI 'http://nexus.ticketnetwork.com/repository/maven-snapshots/com/ticketnetwork/edw/edw-stream-tr-emr_2.11/2-SNAPSHOT/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar'
I0912 14:16:53.276970 809 fetcher.cpp:296] Fetching directly into the sandbox directory
I0912 14:16:53.276990 809 fetcher.cpp:229] Fetching URI 'http://nexus.ticketnetwork.com/repository/maven-snapshots/com/ticketnetwork/edw/edw-stream-tr-emr_2.11/2-SNAPSHOT/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar'
I0912 14:16:53.277006 809 fetcher.cpp:179] Downloading resource from 'http://nexus.ticketnetwork.com/repository/maven-snapshots/com/ticketnetwork/edw/edw-stream-tr-emr_2.11/2-SNAPSHOT/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar' to '/var/lib/mesos/slaves/ea5da916-2d9b-445c-af39-35ef55004f9c-S4/frameworks/68397677-bc51-4853-aaf7-8245792ee3e5-0000/executors/edw_external_emr-test-new.7f4bbed3-b696-11e8-aeb4-02428627103d/runs/ad7ae2dd-75d3-473e-bcba-e9d7f7ce0d99/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar'
I0912 14:16:59.030855 809 fetcher.cpp:620] Fetched 'http://nexus.ticketnetwork.com/repository/maven-snapshots/com/ticketnetwork/edw/edw-stream-tr-emr_2.11/2-SNAPSHOT/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar' to '/var/lib/mesos/slaves/ea5da916-2d9b-445c-af39-35ef55004f9c-S4/frameworks/68397677-bc51-4853-aaf7-8245792ee3e5-0000/executors/edw_external_emr-test-new.7f4bbed3-b696-11e8-aeb4-02428627103d/runs/ad7ae2dd-75d3-473e-bcba-e9d7f7ce0d99/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar'
I0912 14:16:59.030885 809 fetcher.cpp:625] Successfully fetched all URIs into '/var/lib/mesos/slaves/ea5da916-2d9b-445c-af39-35ef55004f9c-S4/frameworks/68397677-bc51-4853-aaf7-8245792ee3e5-0000/executors/edw_external_emr-test-new.7f4bbed3-b696-11e8-aeb4-02428627103d/runs/ad7ae2dd-75d3-473e-bcba-e9d7f7ce0d99'
I0912 14:16:59.501281 832 exec.cpp:162] Version: 1.6.0
I0912 14:16:59.505044 835 exec.cpp:236] Executor registered on agent ea5da916-2d9b-445c-af39-35ef55004f9c-S4
I0912 14:16:59.505780 834 executor.cpp:123] Registered docker executor on ip-10-150-4-4.ec2.internal
I0912 14:16:59.505988 835 executor.cpp:179] Starting task edw_external_emr-test-new.7f4bbed3-b696-11e8-aeb4-02428627103d
Using properties file: null
Parsed arguments:
master yarn
deployMode client
executorMemory 1g
executorCores 1
totalExecutorCores 4
propertiesFile null
driverMemory 1g
driverCores null
driverExtraClassPath null
driverExtraLibraryPath null
driverExtraJavaOptions null
supervise false
queue null
numExecutors 4
files null
pyFiles null
archives null
mainClass com.ticketnetwork.edwstream.external.MapPageView
primaryResource file:/root/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar
name emr_external_mpv_streaming
childArgs []
jars null
packages null
packagesExclusions null
repositories null
verbose true
Spark properties used, including those specified through
--conf and those from the properties file null:
(spark.executorEnv.MAX_RATE,1000)
(spark.executor.memory,1g)
(spark.driver.memory,1g)
(spark.yarn.dist.jars,*********(redacted))
(spark.executor.instances,4)
(spark.yarn.jars,*********(redacted))
(spark.cores.max,4)
(spark.executorEnv.KAFKA_GROUP_ID,edw_mpv_emr_prod_2)
(spark.executorEnv.KAFKA_MAX_POLL_MS,100000)
(spark.executorEnv.AWS_ACCESS_KEY_ID,'AKIAILHOVXT5AKDVWRTA')
(spark.executorEnv.KAFKA_MAX_POLL_RECORDS,1000)
(spark.executorEnv.SCHEMA_REGISTRY_URL,*********(redacted))
(spark.executorEnv.KAFKA_BROKER_LIST,)
(spark.executorEnv.EXT_WH_PASSWORD,*********(redacted))
(spark.executorEnv.AWS_SECRET_ACCESS_KEY,*********(redacted))
(spark.executorEnv.EXT_WH_HOST,10.150.3.46)
(spark.executorEnv.STAGING_S3_BUCKET,'s3a://external-streaming/mpv-emr-prod/')
(spark.executor.cores,1)
Main class:
com.ticketnetwork.edwstream.external.MapPageView
Arguments:
System properties:
(spark.executorEnv.MAX_RATE,1000)
(spark.driver.memory,1g)
(spark.executor.memory,1g)
(spark.yarn.dist.jars,*********(redacted))
(spark.executor.instances,4)
(spark.yarn.jars,*********(redacted))
(spark.executorEnv.KAFKA_GROUP_ID,edw_mpv_emr_prod_2)
(spark.cores.max,4)
(spark.executorEnv.KAFKA_MAX_POLL_MS,100000)
(SPARK_SUBMIT,true)
(spark.executorEnv.AWS_ACCESS_KEY_ID,'AKIAILHOVXT5AKDVWRTA')
(spark.executorEnv.KAFKA_MAX_POLL_RECORDS,1000)
(spark.executorEnv.KAFKA_BROKER_LIST,)
(spark.executorEnv.SCHEMA_REGISTRY_URL,*********(redacted))
(spark.app.name,emr_external_mpv_streaming)
(spark.jars,file:/root/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar)
(spark.submit.deployMode,client)
(spark.executorEnv.AWS_SECRET_ACCESS_KEY,*********(redacted))
(spark.executorEnv.EXT_WH_PASSWORD,*********(redacted))
(spark.master,yarn)
(spark.executorEnv.STAGING_S3_BUCKET,'s3a://external-streaming/mpv-emr-prod/')
(spark.executorEnv.EXT_WH_HOST,10.150.3.46)
(spark.executor.cores,1)
Classpath elements:
file:/root/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar
18/09/12 14:17:04 INFO SparkContext: Running Spark version 2.2.1
18/09/12 14:17:05 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
18/09/12 14:17:05 INFO SparkContext: Submitted application: edw-stream-ext-mpv-emr-prod
18/09/12 14:17:05 INFO SecurityManager: Changing view acls to: root
18/09/12 14:17:05 INFO SecurityManager: Changing modify acls to: root
18/09/12 14:17:05 INFO SecurityManager: Changing view acls groups to:
18/09/12 14:17:05 INFO SecurityManager: Changing modify acls groups to:
18/09/12 14:17:05 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); groups with view permissions: Set(); users with modify permissions: Set(root); groups with modify permissions: Set()
18/09/12 14:17:05 INFO Utils: Successfully started service 'sparkDriver' on port 36286.
18/09/12 14:17:05 INFO SparkEnv: Registering MapOutputTracker
18/09/12 14:17:05 INFO SparkEnv: Registering BlockManagerMaster
18/09/12 14:17:05 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
18/09/12 14:17:05 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up
18/09/12 14:17:05 INFO DiskBlockManager: Created local directory at /tmp/blockmgr-34685f43-55ab-43d8-89af-ded72c7709b7
18/09/12 14:17:05 INFO MemoryStore: MemoryStore started with capacity 366.3 MB
18/09/12 14:17:05 INFO SparkEnv: Registering OutputCommitCoordinator
18/09/12 14:17:06 INFO Utils: Successfully started service 'SparkUI' on port 4040.
18/09/12 14:17:06 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://10.150.4.4:4040
18/09/12 14:17:06 INFO SparkContext: Added JAR file:/root/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar at spark://10.150.4.4:36286/jars/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar with timestamp 1536761826186
18/09/12 14:17:08 INFO TimelineClientImpl: Timeline service address: http://ip-10-150-4-211.ec2.internal:8188/ws/v1/timeline/
18/09/12 14:17:08 INFO RMProxy: Connecting to ResourceManager at ip-10-150-4-211.ec2.internal/10.150.4.211:8032
18/09/12 14:17:08 INFO Client: Requesting a new application from cluster with 8 NodeManagers
18/09/12 14:17:08 INFO Client: Verifying our application has not requested more than the maximum memory capability of the cluster (12288 MB per container)
18/09/12 14:17:08 INFO Client: Will allocate AM container, with 896 MB memory including 384 MB overhead
18/09/12 14:17:08 INFO Client: Setting up container launch context for our AM
18/09/12 14:17:08 INFO Client: Setting up the launch environment for our AM container
18/09/12 14:17:08 INFO Client: Preparing resources for our AM container
18/09/12 14:17:08 INFO Client: Source and destination file systems are the same. Not copying hdfs://ip-10-150-4-211.ec2.internal:8020/user/root/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar
18/09/12 14:17:08 WARN Client: Same path resource hdfs://ip-10-150-4-211.ec2.internal:8020/user/root/edw-stream-external-mpv_2.11-2-SNAPSHOT.jar added multiple times to distributed cache.
18/09/12 14:17:08 INFO Client: Uploading resource file:/tmp/spark-93c7dfb3-88af-4dad-a2f9-77472970f842/__spark_conf__911960077635220194.zip -> hdfs://ip-10-150-4-211.ec2.internal:8020/user/root/.sparkStaging/application_1533161297339_0357/__spark_conf__.zip
18/09/12 14:17:08 INFO SecurityManager: Changing view acls to: root
18/09/12 14:17:08 INFO SecurityManager: Changing modify acls to: root
18/09/12 14:17:08 INFO SecurityManager: Changing view acls groups to:
18/09/12 14:17:08 INFO SecurityManager: Changing modify acls groups to:
18/09/12 14:17:08 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); groups with view permissions: Set(); users with modify permissions: Set(root); groups with modify permissions: Set()
18/09/12 14:17:09 INFO Client: Submitting application application_1533161297339_0357 to ResourceManager
18/09/12 14:17:09 INFO YarnClientImpl: Submitted application application_1533161297339_0357
18/09/12 14:17:09 INFO SchedulerExtensionServices: Starting Yarn extension services with app application_1533161297339_0357 and attemptId None
18/09/12 14:17:10 INFO Client: Application report for application_1533161297339_0357 (state: ACCEPTED)
18/09/12 14:17:10 INFO Client:
client token: N/A
diagnostics: N/A
ApplicationMaster host: N/A
ApplicationMaster RPC port: -1
queue: default
start time: 1536761829019
final status: UNDEFINED
tracking URL: http://ip-10-150-4-211.ec2.internal:20888/proxy/application_1533161297339_0357/
user: root
18/09/12 14:17:11 INFO Client: Application report for application_1533161297339_0357 (state: FAILED)
18/09/12 14:17:11 INFO Client:
client token: N/A
diagnostics: Application application_1533161297339_0357 failed 2 times due to AM Container for appattempt_1533161297339_0357_000002 exited with exitCode: 1
For more detailed output, check application tracking page:http://ip-10-150-4-211.ec2.internal:8088/cluster/app/application_1533161297339_0357Then, click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id: container_1533161297339_0357_02_000001
Exit code: 1
Stack trace: ExitCodeException exitCode=1:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:582)
at org.apache.hadoop.util.Shell.run(Shell.java:479)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:773)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:212)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Container exited with a non-zero exit code 1
Failing this attempt. Failing the application.
ApplicationMaster host: N/A
ApplicationMaster RPC port: -1
queue: default
start time: 1536761829019
final status: FAILED
tracking URL: http://ip-10-150-4-211.ec2.internal:8088/cluster/app/application_1533161297339_0357
user: root
18/09/12 14:17:11 INFO Client: Deleted staging directory hdfs://ip-10-150-4-211.ec2.internal:8020/user/root/.sparkStaging/application_1533161297339_0357
18/09/12 14:17:11 ERROR SparkContext: Error initializing SparkContext.
org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master.
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:85)
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:62)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:173)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:509)
at org.apache.spark.streaming.StreamingContext$.createNewSparkContext(StreamingContext.scala:839)
at org.apache.spark.streaming.StreamingContext.<init>(StreamingContext.scala:85)
at com.ticketnetwork.edwstream.external.MapPageView$.main(MapPageView.scala:118)
at com.ticketnetwork.edwstream.external.MapPageView.main(MapPageView.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:775)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
18/09/12 14:17:11 INFO SparkUI: Stopped Spark web UI at http://10.150.4.4:4040
18/09/12 14:17:11 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Attempted to request executors before the AM has registered!
18/09/12 14:17:11 INFO YarnClientSchedulerBackend: Shutting down all executors
18/09/12 14:17:11 INFO YarnSchedulerBackend$YarnDriverEndpoint: Asking each executor to shut down
18/09/12 14:17:11 INFO SchedulerExtensionServices: Stopping SchedulerExtensionServices
(serviceOption=None,
services=List(),
started=false)
18/09/12 14:17:11 INFO YarnClientSchedulerBackend: Stopped
18/09/12 14:17:11 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
18/09/12 14:17:11 INFO MemoryStore: MemoryStore cleared
18/09/12 14:17:11 INFO BlockManager: BlockManager stopped
18/09/12 14:17:11 INFO BlockManagerMaster: BlockManagerMaster stopped
18/09/12 14:17:11 WARN MetricsSystem: Stopping a MetricsSystem that is not running
18/09/12 14:17:11 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
18/09/12 14:17:11 INFO SparkContext: Successfully stopped SparkContext
Exception in thread "main" org.apache.spark.SparkException: Yarn application has already ended! It might have been killed or unable to launch application master.
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.waitForApplication(YarnClientSchedulerBackend.scala:85)
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:62)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:173)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:509)
at org.apache.spark.streaming.StreamingContext$.createNewSparkContext(StreamingContext.scala:839)
at org.apache.spark.streaming.StreamingContext.<init>(StreamingContext.scala:85)
at com.ticketnetwork.edwstream.external.MapPageView$.main(MapPageView.scala:118)
at com.ticketnetwork.edwstream.external.MapPageView.main(MapPageView.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:775)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
18/09/12 14:17:11 INFO ShutdownHookManager: Shutdown hook called
18/09/12 14:17:11 INFO ShutdownHookManager: Deleting directory /tmp/spark-93c7dfb3-88af-4dad-a2f9-77472970f842
Core-site.xml:我已经在spark和hadoop主目录中添加了core-site.xml文件。
root@ec22ba1fc196:/opt/spark-2.2.1-bin-hadoop2.7/conf# cat core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Licensed to the Apache Software Foundation (ASF) under one or more -->
<!-- contributor license agreements. See the NOTICE file distributed with -->
<!-- this work for additional information regarding copyright ownership. -->
<!-- The ASF licenses this file to You under the Apache License, Version 2.0 -->
<!-- (the "License"); you may not use this file except in compliance with -->
<!-- the License. You may obtain a copy of the License at -->
<!-- -->
<!-- http://www.apache.org/licenses/LICENSE-2.0 -->
<!-- -->
<!-- Unless required by applicable law or agreed to in writing, software -->
<!-- distributed under the License is distributed on an "AS IS" BASIS, -->
<!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -->
<!-- See the License for the specific language governing permissions and -->
<!-- limitations under the License. -->
<configuration>
<property>
<!-- URI of NN. Fully qualified. No IP.-->
<name>fs.defaultFS</name>
<value>hdfs://ip-10-150-4-211.ec2.internal:8020</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.groups</name>
<value>hudson,testuser,root,hadoop,jenkins,oozie,hive,httpfs,hue,users</value>
</property>
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.groups</name>
<value>*</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>fs.s3.buffer.dir</name>
<value>/mnt/s3</value>
<final>true</final>
</property>
<property>
<name>fs.s3.impl</name>
<value>com.amazon.ws.emr.hadoop.fs.EmrFileSystem</value>
</property>
<property>
<name>fs.s3n.impl</name>
<value>com.amazon.ws.emr.hadoop.fs.EmrFileSystem</value>
</property>
<property>
<name>ipc.client.connect.max.retries.on.timeouts</name>
<value>5</value>
</property>
<property>
<name>hadoop.security.key.default.bitlength</name>
<value>256</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/mnt/var/lib/hadoop/tmp</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.security.key.provider.path</name>
<value>kms://http@ip-10-150-4-211.ec2.internal:9700/kms</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>65536</value>
</property>
<property>
<name>fs.AbstractFileSystem.s3.impl</name>
<value>org.apache.hadoop.fs.s3.EMRFSDelegate</value>
</property>
<property>
<name>fs.s3bfs.impl</name>
<value>org.apache.hadoop.fs.s3.S3FileSystem</value>
</property>
</configuration>