java.util.concurrent.TimeoutException: Futures timed out after [100000 milliseconds]
at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219)
at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223)
at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:201)
at org.apache.spark.deploy.yarn.ApplicationMaster.runDriver(ApplicationMaster.scala:498)
at org.apache.spark.deploy.yarn.ApplicationMaster.org$apache$spark$deploy$yarn$ApplicationMaster$$runImpl(ApplicationMaster.scala:345)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anonfun$run$2.apply$mcV$sp(ApplicationMaster.scala:260)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anonfun$run$2.apply(ApplicationMaster.scala:260)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anonfun$run$2.apply(ApplicationMaster.scala:260)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$5.run(ApplicationMaster.scala:815)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1844)
at org.apache.spark.deploy.yarn.ApplicationMaster.doAsUser(ApplicationMaster.scala:814)
at org.apache.spark.deploy.yarn.ApplicationMaster.run(ApplicationMaster.scala:259)
at org.apache.spark.deploy.yarn.ApplicationMaster$.main(ApplicationMaster.scala:839)
at org.apache.spark.deploy.yarn.ApplicationMaster.main(ApplicationMaster.scala)
进行了大量搜索,看起来该线程无法执行任务并超时,阅读许多链接以查找解决方案,其中许多建议增加spark.sql.broadcastTimeout。因此,我在将spark作业提交给6000000时在spark conf中设置了此变量,以下是我用来向EMR集群提交spark作业的命令。
spark-submit --deploy-mode cluster --class spark.Main --master yarn --deploy-mode cluster --conf spark.sql.broadcastTimeout=6000000 --files /home/hadoop/myproject/conf/abc.properties /home/hadoop/myproject/job-artifacts/project_jar_with_dependencies.jar
但是,我仍然遇到同样的例外,如果有人有任何想法请帮助。