在同一个JVM中检测到多个运行的SparkContexts - Java Spark

时间:2016-11-21 00:24:09

标签: java mongodb scala apache-spark

我正在尝试使用JavaSparkContext来阅读MongoDB集合。所以我有以下实用程序:

public class SparkUtil {

    private String host;

    private Integer port;

    private String database;

    public final static Logger log = Logger.getLogger( SparkUtil.class );

    private static final String SPARK_MONGO_INPUT_URI = "spark.mongodb.input.uri";
    private static final String SPARK_MONGO_OUTPUT_URI = "spark.mongodb.output.uri";

    private static SparkConf conf;
    private static JavaSparkContext jsc;


    /**
     * 
     * @param master
     * @param appname
     * @param inputCollection
     * @param outputCollection
     */
    public SparkUtil(final String host, final Integer port, final String database,
                     final String master, final String appname, final String inputCollection,
                     final String outputCollection) {
        try {
            this.host = host;
            this.port = port;
            this.database = database;
            String inputURI = this.formatMongoURI(inputCollection);
            String outputURI = this.formatMongoURI(outputCollection);
            log.info("----------------------------------------------------");
            log.info("Mongo Input URI: " + inputURI);
            log.info("Mongo Output URI: " + outputURI);
            log.info("----------------------------------------------------");
            conf = new SparkConf()
                            .setMaster(master)
                            .setAppName(appname)
                            .set(SPARK_MONGO_INPUT_URI, inputURI)
                            .set(SPARK_MONGO_OUTPUT_URI, outputURI)
                            .set("spark.driver.allowMultipleContexts", "true");
            SparkContext sc = new SparkContext(conf);
            jsc = JavaSparkContext.fromSparkContext(sc);
        } catch (Exception ex) {
            log.error(ex.getMessage());
        }
    }
 }

但是,我收到以下错误:

16:19:58.929 [main] DEBUG org.spark_project.jetty.util.component.AbstractLifeCycle - STARTED @36813ms o.s.j.s.ServletContextHandler@de81be1{/metrics/json,null,AVAILABLE}
16:19:58.931 [main] WARN org.apache.spark.SparkContext - Multiple running SparkContexts detected in the same JVM!
org.apache.spark.SparkException: Only one SparkContext may be running in this JVM (see SPARK-2243). To ignore this error, set spark.driver.allowMultipleContexts = true. The currently running SparkContext was created at:
org.apache.spark.SparkContext.<init>(SparkContext.scala:77)
cloudos.utils.SparkUtil.<init>(SparkUtil.java:65)
utils.SparkUtilTest.setUp(SparkUtilTest.java:47)
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
java.lang.reflect.Method.invoke(Method.java:498)
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:24)
org.springframework.test.context.junit4.statements.RunBeforeTestMethodCallbacks.evaluate(RunBeforeTestMethodCallbacks.java:75)
org.springframework.test.context.junit4.statements.RunAfterTestMethodCallbacks.evaluate(RunAfterTestMethodCallbacks.java:86)
org.springframework.test.context.junit4.statements.SpringRepeat.evaluate(SpringRepeat.java:84)
org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
org.springframework.test.context.junit4.SpringJUnit4ClassRunner.runChild(SpringJUnit4ClassRunner.java:252)
org.springframework.test.context.junit4.SpringJUnit4ClassRunner.runChild(SpringJUnit4ClassRunner.java:94)
org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
    at org.apache.spark.SparkContext$$anonfun$assertNoOtherContextIsRunning$2.apply(SparkContext.scala:2223)
    at org.apache.spark.SparkContext$$anonfun$assertNoOtherContextIsRunning$2.apply(SparkContext.scala:2219)
    at scala.Option.foreach(Option.scala:257)
    at org.apache.spark.SparkContext$.assertNoOtherContextIsRunning(SparkContext.scala:2219)
    at org.apache.spark.SparkContext$.setActiveContext(SparkContext.scala:2305)
    at org.apache.spark.SparkContext.<init>(SparkContext.scala:2175)
    at cloudos.utils.SparkUtil.<init>(SparkUtil.java:65)
    at utils.SparkUtilTest.setUp(SparkUtilTest.java:47)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
    at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
    at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
    at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:24)
    at org.springframework.test.context.junit4.statements.RunBeforeTestMethodCallbacks.evaluate(RunBeforeTestMethodCallbacks.java:75)
    at org.springframework.test.context.junit4.statements.RunAfterTestMethodCallbacks.evaluate(RunAfterTestMethodCallbacks.java:86)
    at org.springframework.test.context.junit4.statements.SpringRepeat.evaluate(SpringRepeat.java:84)
    at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
    at org.springframework.test.context.junit4.SpringJUnit4ClassRunner.runChild(SpringJUnit4ClassRunner.java:252)
    at org.springframework.test.context.junit4.SpringJUnit4ClassRunner.runChild(SpringJUnit4ClassRunner.java:94)
    at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
    at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
    at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
    at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
    at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
    at org.springframework.test.context.junit4.statements.RunBeforeTestClassCallbacks.evaluate(RunBeforeTestClassCallbacks.java:61)
    at org.springframework.test.context.junit4.statements.RunAfterTestClassCallbacks.evaluate(RunAfterTestClassCallbacks.java:70)
    at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
    at org.springframework.test.context.junit4.SpringJUnit4ClassRunner.run(SpringJUnit4ClassRunner.java:191)
    at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
    at org.junit.runner.JUnitCore.run(JUnitCore.java:115)
    at org.testng.junit.JUnit4TestRunner.start(JUnit4TestRunner.java:81)
    at org.testng.junit.JUnit4TestRunner.run(JUnit4TestRunner.java:69)
    at org.testng.TestRunner$1.run(TestRunner.java:689)
    at org.testng.TestRunner.runWorkers(TestRunner.java:1014)
    at org.testng.TestRunner.privateRunJUnit(TestRunner.java:720)
    at org.testng.TestRunner.run(TestRunner.java:621)
    at org.testng.SuiteRunner.runTest(SuiteRunner.java:359)
    at org.testng.SuiteRunner.runSequentially(SuiteRunner.java:354)
    at org.testng.SuiteRunner.privateRun(SuiteRunner.java:312)
    at org.testng.SuiteRunner.run(SuiteRunner.java:261)
    at org.testng.SuiteRunnerWorker.runSuite(SuiteRunnerWorker.java:52)
    at org.testng.SuiteRunnerWorker.run(SuiteRunnerWorker.java:86)
    at org.testng.TestNG.runSuitesSequentially(TestNG.java:1191)
    at org.testng.TestNG.runSuitesLocally(TestNG.java:1116)
    at org.testng.TestNG.run(TestNG.java:1024)
    at org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:115)
    at org.apache.maven.surefire.testng.TestNGDirectoryTestSuite.executeSingleClass(TestNGDirectoryTestSuite.java:129)
    at org.apache.maven.surefire.testng.TestNGDirectoryTestSuite.execute(TestNGDirectoryTestSuite.java:113)
    at org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:111)
    at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:203)
    at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:155)
    at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:103)
16:19:58.932 [main] DEBUG org.springframework.test.context.cache.DefaultCacheAwareContextLoaderDelegate - Retrieved ApplicationContext from cache with key [[WebMergedContextConfiguration@1fa121e2 testClass = SparkUtilTest, locations = '{}', classes = '{class utils.SparkUtilTest$ContextConfiguration}', contextInitializerClasses = '[]', activeProfiles = '{}', propertySourceLocations = '{classpath:test.properties}', propertySourceProperties = '{org.springframework.boot.test.context.SpringBootTestContextBootstrapper=true}', contextCustomizers = set[org.springframework.boot.test.context.SpringBootTestContextCustomizer@6aba2b86, org.springframework.boot.test.context.filter.ExcludeFilterContextCustomizer@47af7f3d, org.springframework.boot.test.mock.mockito.MockitoContextCustomizer@0, org.springframework.boot.test.autoconfigure.properties.PropertyMappingContextCustomizer@0], resourceBasePath = 'src/main/webapp', contextLoader = 'org.springframework.test.context.support.AnnotationConfigContextLoader', parent = [null]]]
16:19:58.932 [main] DEBUG org.springframework.test.context.cache - Spring test ApplicationContext cache statistics: [DefaultContextCache@4b7dc788 size = 1, maxSize = 32, parentContextCount = 0, hitCount = 12, missCount = 1]

我正在运行以下Unit Test

@RunWith(SpringRunner.class)
@SpringBootTest
@TestPropertySource(value="classpath:test.properties")
@ContextConfiguration(loader = AnnotationConfigContextLoader.class)
public class SparkUtilTest {

    public final static Logger log = Logger.getLogger( SparkUtilTest.class );

    private SparkUtil sparkUtil;

    @Value("${spring.data.mongodb.host}")
    private String host;

    @Value("${spring.data.mongodb.port}")
    private Integer port;

    @Value("${spring.data.mongodb.database}")
    private String database;

    @Configuration
    @TestPropertySource(value="classpath:test.properties")
    static class ContextConfiguration {
    }

    @Before
    public void setUp() throws Exception {
        this.sparkUtil = new SparkUtil(this.host, this.port, this.database, "local", 
                                       "AmazonML", "aws_instances",  "aws_instances");
    }

    @Test
    public void testGetMethods() {
        assertNotNull(this.sparkUtil.getJavaSparkContext());
        assertNotNull(this.sparkUtil.getSparkConfig());
    }

    @Test
    public void testRead() {
        JavaRDD<Document> rdd = this.sparkUtil.read();
        assertNotNull(rdd);
        assertNotEquals(rdd.count(), 0);
        log.info("-------------------------------------");
        log.info("Count: " + rdd.count());
        log.info("Object: " + rdd.first().toJson());
        log.info("-------------------------------------");
    }

}

我使用了set("spark.driver.allowMultipleContexts", "true"),但没有任何区别。 我正在使用scala 2.12mongo-spark-connector_2.11spark-sql_2.11Spark 1.63。我该如何解决我的问题?

1 个答案:

答案 0 :(得分:1)

我认为这是由setUp()方法引起的,该方法使用@Before注释,因此在每个@Test之前调用它。在这种情况下,它被调用两次,因为你有两个测试,这就是创建两个SparkContexts的原因。

最简单的解决方案似乎是将@Before替换为@BeforeClass,但这只会暂时解决问题(直到您添加更多带有测试的类)。但是,你现在可以试试看它是否有帮助。

您还可以在每次测试后尝试stop ping SparkContext(如果您将@After更改为@AfterClass,则使用@Before@BeforeClass 。我认为无论如何,当它不再需要时,你应该stop,这是一种很好的做法。

另一种解决方案是确保每个JVM只创建一次SparkUtil。你可以简单地使用singleton pattern,但是你没有简单的方法来阻止SparkContext。也许Spring Boot提供了一种更好的初始化和清理机制?