运行spark 1.6时,我的SparkAppHandle运行正常。但是在升级到Spark 2.3之后,火花作业不会被调用。
handle.getState()失败了 handle.getAppId()为NULL。
我需要在pom.xml中添加一些内容还是需要为Spark 2.3版本添加一些属性?
以下是我在Spark 1.6上运行的代码:
SparkAppHandle handle = new SparkLauncher().setSparkHome(sparkProp.getProperty("spark.home"))
.setAppResource(sparkProp.getProperty("spark.resource_jar"))
.setMainClass("com.mycom.bdec.spark.validator.SparkDBToDBComparison")
.setConf("spark.serializer", sparkProp.getProperty("spark.serializer"))
.setConf(SparkLauncher.DRIVER_MEMORY, "16g")//sparkProp.getProperty("spark.driver_memory"))
.setConf(SparkLauncher.EXECUTOR_MEMORY, sparkProp.getProperty("spark.executor_memory"))
.setConf(SparkLauncher.EXECUTOR_CORES, sparkProp.getProperty("spark.executor_cores"))
.setConf("spark.submit.deployMode", sparkProp.getProperty("spark.deploy_mode"))
.setConf(SparkLauncher.SPARK_MASTER, sparkProp.getProperty("spark.master"))
.setConf("spark.shuffle.service.enabled", "true")
.setConf("spark.dynamicAllocation.enabled", "false")
.setConf("spark.executor.instances", "300")
.setConf("spark.sql.shuffle.partitions", "2001")
.setConf("spark.default.parallelism", "2001")
.setConf("spark.yarn.executor.memoryOverhead", "7168")
.setConf("spark.task.cpus", "2")
.setConf("spark.scheduler.mode", "FAIR")
.setConf("spark.yarn.queue", queueName)
.setConf("spark.yarn.jar", "maprfs://"+sparkProp.getProperty("spark.cloak.jar"))
.setConf("spark.sql.parquet.binaryAsString","true")
.setConf(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS, "-XX:MaxDirectMemorySize=1024m -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=30 -XX:+ScavengeBeforeFullGC -XX:+CMSScavengeBeforeRemark")
.setConf(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS,"-XX:MaxDirectMemorySize=1024m -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=30 -XX:+ScavengeBeforeFullGC -XX:+CMSScavengeBeforeRemark")
.setConf("spark.network.timeout", "5000s")
.setConf("spark.akka.frameSize", "1024")
.setConf("spark.rpc.numRetries", "5")
.setConf("spark.speculation", "true")
.setConf("spark.locality.wait", "1s")
.setConf("spark.akka.threads", "5")
.setConf("spark.sql.parquet.filterPushdown", "true")
.addJar(sparkProp.getProperty("spark.cloak.jar"))
.addJar(sparkProp.getProperty("spark.csv.jar"))
.addJar(sparkProp.getProperty("spark.commons.csv.jar"))
.addJar(sparkProp.getProperty("spark.nz.jdbc.jar"))
.addJar(sparkProp.getProperty("spark.nz.connector.jar"))
.addJar(sparkProp.getProperty("spark.gmdaisvcrypt.jar"))
.setConf("spark.db1Type", metaDataMap.get("db1Type"))
.setConf("spark.db2Type", metaDataMap.get("db2Type"))
.setConf("spark.sqoopHiveTable1", sqoopHiveTable1)
.setConf("spark.sqoopHiveTable2", sqoopHiveTable2)
.setConf("spark.jdbcDbUrl1", metaDataMap.get("jdbcDbUrl1"))
.setConf("spark.jdbcDriver1", metaDataMap.get("jdbcDriver1"))
.setConf("spark.jdbcUsername1", metaDataMap.get("jdbcUsername1"))
.setConf("spark.jdbcPassword1", metaDataMap.get("jdbcPassword1"))
.setConf("spark.hiveDatabaseName1", metaDataMap.get("dbName"))
.setConf("spark.jdbcDbUrl2", metaDataMap.get("jdbcDbUrl2"))
.setConf("spark.jdbcDriver2", metaDataMap.get("jdbcDriver2"))
.setConf("spark.jdbcUsername2", metaDataMap.get("jdbcUsername2"))
.setConf("spark.jdbcPassword2", metaDataMap.get("jdbcPassword2"))
.setConf("spark.hiveDatabaseName2", metaDataMap.get("dbName_2"))
.setConf("spark.query1", metaDataMap.get("query_1"))
.setConf("spark.query2", metaDataMap.get("query_2"))
.setConf("spark.recordCount", metaDataMap.get("recordCount"))
.setConf("spark.distinctValue", metaDataMap.get("distinctValues"))
.setConf("spark.colNames", metaDataMap.get("tableKey"))
.setConf("spark.colNames2", metaDataMap.get("tableKey_2"))
.setConf("spark.dataTypeCheck", metaDataMap.get("dataTypeCheck"))
.setConf("spark.dataTypeColNames", dataTypeColNames)
.setConf("spark.dataLengthCheck", metaDataMap.get("dataLengthCheck"))
.setConf("spark.dataLengthColNames", metaDataMap.get("dataLengthColNames"))
.setConf("spark.multiColumnKeyValueCheck", metaDataMap.get("multiColumnKeyValueCheck"))
.setConf("spark.keyCols", metaDataMap.get("keyCols"))
.setConf("spark.userValuesCheck", metaDataMap.get("userValuesCheck"))
.setConf("spark.valueCols", metaDataMap.get("valueCols"))
.setConf("spark.testExecutionId", metaDataMap.get("testExecutionId"))
.setConf("spark.ouputDataPath", ouputDataPath)
.setConf("spark.resultStatDataPath", resultStatDataPath)
.setConf("spark.matchDataPath", matchDataPath)
.setConf("spark.writeDelimeter", writeDelimeter)
.setConf("spark.voltageConfigFile", metaDataMap.get("voltageConfigFile"))
.setConf("spark.pIColNm_1", metaDataMap.get("pIColNm_1"))
.setConf("spark.pIColNm_2", metaDataMap.get("pIColNm_2"))
.setConf("spark.yarn.log4jpath", ExecutionUtil.getLogFilePath())
.setConf("spark.yarn.logFilePath", ExecutionUtil.makeLog4jProperties(logDir))
.setAppName("SparkDbToDbComparison")
.startApplication();
CountDownLatch countDownLatch = new CountDownLatch(1);
handle.addListener(new SparkAppHandle.Listener() {
boolean sparkJobIdSaved = false;
@Override
public void stateChanged(SparkAppHandle handle) {
if(!sparkJobIdSaved && handle.getAppId()!=null){
logger.info("Getting Spark App Id -State " + handle.getState());
logger.info("Getting Spark App Id -getAppId " + handle.getAppId());
//Store SparkJobId in DB
ExecutionUtil.updateExecution(exec,handle.getAppId());
sparkJobIdSaved=true;
}
if (handle.getState().isFinal()) {
countDownLatch.countDown();
}
}
@Override
public void infoChanged(SparkAppHandle handle) {
}
});
countDownLatch.await();