上下文
我在Amazon EMR集群上运行spark应用程序。
这些应用程序由Yarn精心策划。
在AWS Console中,我可以使用群集详细信息页面的Application History
标签获取YARN应用程序状态。 (参见View Application History)
期望/问题
我想从java或scala程序获得相同的信息(应用程序状态)。
那么,是否可以从AWS EMR Java SDK获得纱线应用程序状态?
在我的应用程序中,我管理了一些EMR对象实例,如:
提前致谢。
答案 0 :(得分:0)
之所以遇到这个问题,是因为我正在寻找一种通过EMR“步骤” api来获取工作状态的方法...但是,如果您想直接通过yarn来获取它,这里是一些示例代码:
object DataLoad {
private def getJsonField(json: JValue, key: String): Option[String] = {
val value = (json \ key)
value match {
case jval: JValue => Some(jval.values.toString)
case _ => None
}
}
def load(logger: Logger, hiveDatabase: String, hiveTable: String, dw_table_name: String): Unit = {
val conf = ConfigFactory.load
val yarnResourceManager = conf.getString("app.yarnResourceManager")
val sparkExecutors = conf.getString("app.sparkExecutors")
val sparkHome = conf.getString("app.sparkHome")
val sparkAppJar = conf.getString("app.sparkAppJar")
val sparkMainClass = conf.getString("app.sparkMainClass")
val sparkMaster = conf.getString("app.sparkMaster")
val sparkDriverMemory = conf.getString("app.sparkDriverMemory")
val sparkExecutorMemory = conf.getString("app.sparkExecutorMemory")
var destination = ""
if(dw_table_name.contains("s3a://")){
destination = "s3"
}
else
{
destination = "sql"
}
val spark = new SparkLauncher()
.setSparkHome(sparkHome)
.setAppResource(sparkAppJar)
.setMainClass(sparkMainClass)
.setMaster(sparkMaster)
.addAppArgs(hiveDatabase)
.addAppArgs(hiveTable)
.addAppArgs(destination)
.setVerbose(false)
.setConf("spark.driver.memory", sparkDriverMemory)
.setConf("spark.executor.memory", sparkExecutorMemory)
.setConf("spark.executor.cores", sparkExecutors)
.setConf("spark.executor.instances", sparkExecutors)
.setConf("spark.driver.maxResultSize", "5g")
.setConf("spark.sql.broadcastTimeout", "144000")
.setConf("spark.network.timeout", "144000")
.startApplication()
var unknownCounter = 0
while(!spark.getState.isFinal) {
println(spark.getState.toString)
Thread.sleep(10000)
if(unknownCounter > 3000){
throw new IllegalStateException("Spark Job Failed, timeout expired 8 hours")
}
unknownCounter += 1
}
println(spark.getState.toString)
val appId: String = spark.getAppId
println(s"appId: $appId")
var finalState = ""
var i = 0
while(i < 5){
val response = Http(s"http://$yarnResourceManager/ws/v1/cluster/apps/$appId/").asString
if(response.code.toString.startsWith("2"))
{
val json = parse(response.body)
finalState = getJsonField(json \ "app","finalStatus").getOrElse("")
i = 55
}
else {
i = i+1
}
}
if(finalState.equalsIgnoreCase("SUCCEEDED")){
println("SPARK JOB SUCCEEDED")
}
else {
throw new IllegalStateException("Spark Job Failed")
}
}
}