我正在尝试在Spring Web应用程序中使用Spark Launcher库以编程方式提交Spark作业。
在yarn-client
,yarn-cluster
和standalone-client
模式下一切正常。但是,在使用standalone-cluster
模式时,SparkAppHandle的getState()
永远停留在UNKNOWN
中。有什么建议吗?谢谢。
这是服务的代码
import org.apache.spark.launcher.SparkAppHandle;
import org.apache.spark.launcher.SparkLauncher;
import org.springframework.stereotype.Service;
@Service
public class SparkServices {
public String launchJob(String master, String mode) throws Exception {
SparkAppHandle handle = new SparkLauncher()
.setAppName("test1")
.setSparkHome("/usr/local/spark")
.setAppResource("hdfs://nn:9000/spark-application.jar")
.setMainClass("my.App")
.setMaster(master)
.setDeployMode(mode)
.setConf("spark.executor.instances", "2")
.setConf("spark.driver.memory", "2g")
.setConf("spark.driver.cores", "1")
.setConf("spark.executor.memory", "2g")
.setConf("spark.executor.cores", "1")
.addAppArgs("hdfs://nn:9000/spark-project/files/")
.setVerbose(true)
.startApplication(new SparkAppHandle.Listener() {
@Override
public void stateChanged(SparkAppHandle sparkAppHandle) {
System.out.println("state >>> " + sparkAppHandle.getState());
}
@Override
public void infoChanged(SparkAppHandle sparkAppHandle) {
System.out.println("info >>> " + sparkAppHandle.getState());
}
});
while (!handle.getState().isFinal()){
System.out.println("state >>> " + handle.getState());
Thread.sleep(10000);
}
return "finished with >>>" + handle.getState();
}
}
还有控制器的代码
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;
@RestController
public class TaskController {
@Autowired
private SparkServices sparkServices;
@GetMapping("/sparkJobs/{master}/{mode}")
public String sparkJob(@PathVariable("master") String master, @PathVariable("mode") String mode) throws Exception {
return sparkServices.launchJob(master, mode);
}
}