我有一段代码,它一次运行一个作业,从oracle导入数据并将其放入HDFS。目前,一次只能运行一个作业。有什么方法可以通过一次执行两个作业来提高性能,还可以根据可用资源配置要执行的作业数量?
def runETL(aInput: Any) {
// Copy a trigger file to HDFS to launch the Job (90 tables) and Incremental (10 tables) load Job
copyBytesToFile("Oracle DB refreshed", ConfigFactory.load.getString("oracle.status.file.path"))
var start = System.currentTimeMillis
val maxlastActionDteTime: String = EventDAO.getMaxLastActionDate
start = System.currentTimeMillis
EventDAO.loadStagingTables(maxlastActionDteTime, ConfigFactory.load.getInt("facets.oracledb"),
ConfigFactory.load.getInt("facets.oracledb"), ConfigFactory.load.getInt("facets.oracledb"))
case e: Exception => {
logger.error(getStackTraceAsString(e))
MailUtil.sendMail(s"ETLScheduler Error -> " + e.getMessage)
throw new RuntimeException("runETL was terminated due to an Exception.")
}
}
val sparkAppHandle = new SparkLauncher()
.setSparkHome(ConfigFactory.load.getString("spark.home.location"))
.setAppResource(ConfigFactory.load.getString("spark.resource.jar.location"))
.setMainClass("com.s.PIDriver")
.setMaster("yarn-cluster")
.setConf("spark.executor.memory", ConfigFactory.load.getString("spark.conf.executor.memory."))
.setConf("spark.executor.instances", ConfigFactory.load.getString("spark.conf.executor.instances."))
.setConf("spark.executor.cores", ConfigFactory.load.getString("spark.conf.executor.cores."))
.setConf("spark.yarn.queue", ConfigFactory.load.getString("spark.conf.queue."))
.setConf("spark.driver.memory", ConfigFactory.load.getString("spark.conf.driver.memory."))
.startApplication()
sparkAppHandle.addListener(new SparkAppHandle.Listener() {
@Override
def stateChanged(handle: SparkAppHandle) = {
// This method is called when the Application toggles between states (CONNECTED,SUBMITTED,RUNNING,FINISHED or FAILED or KILLED)
val appState = handle.getState
if (appState.isFinal) {
// Copy a trigger file to HDFS to launch the Table load Job only if the App finished and did not fail or was not killed.
if (appState == SparkAppHandle.State.FINISHED) {
//Compute Stats and Invalidate Metadata
start = System.currentTime
EventDAO.updateTableMetadata
logger.info("Hive and Impala metadata refreshed in " + (System.currentTime - start) + " seconds..so copying the _refresh.done file")
copyBytesToFile(" ETL Spark Job and Metadata updates completed", ConfigFactory.load.getString(".status.file.path"))
logger.info("************** ETL Spark Job and Metadata updates were completed successfully... **************")
} else if (appState == SparkAppHandle.State.KILLED || appState == SparkAppHandle.State.FAILED || appState == SparkAppHandle.State.UNKNOWN) {
MailU.send(s" Job did not complete, Application finished with status, " + appState)
throw new RuntimeException(" Job did not complete, Application finished with status, " + appState)