如何在项目中使用spark的子模块(例如spark_core)?

时间:2019-05-16 12:13:53

标签: scala maven apache-spark pom.xml multi-module

我知道我可以将jar添加到我的项目中并且可以使用它,但这是我不想做的事情。我需要使用在github上的spark_parent_2.12中拥有的核心模块的源代码。

我能够从spark中提取核心项目并将其添加到我的项目中,因为这里的依赖项是我的pom.xml。

项目的pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-parent_2.12</artifactId>
    <version>3.0.0-SNAPSHOT</version>

    <modules>
        <module>core</module>
    </modules>

    <properties>
        <sbt.project.name>core</sbt.project.name>
        <sbt.project.name>core</sbt.project.name><avro.mapred.classifier>hadoop2</avro.mapred.classifier>
        <javaxservlet.version>3.1.0</javaxservlet.version>
        <scala.binary.version>2.12</scala.binary.version>
        <oro.version>2.0.8</oro.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <scala.version>2.12.8</scala.version>
        <scala.plugin.version>3.2.2</scala.plugin.version>
        <scoverage.plugin.version>1.3.0</scoverage.plugin.version>
        <project-info-reports.plugin.version>3.0.0</project-info-reports.plugin.version>
        <json4s.version>3.6.5</json4s.version>
        <scalaxml.version>1.1.1</scalaxml.version>
    </properties>


    <dependencies>
        <dependency>
            <groupId>com.thoughtworks.paranamer</groupId>
            <artifactId>paranamer</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.avro</groupId>
            <artifactId>avro</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.avro</groupId>
            <artifactId>avro-mapred</artifactId>
            <classifier>${avro.mapred.classifier}</classifier>
        </dependency>
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
        </dependency>
        <dependency>
            <groupId>com.twitter</groupId>
            <artifactId>chill_${scala.binary.version}</artifactId>
        </dependency>
        <dependency>
            <groupId>com.twitter</groupId>
            <artifactId>chill-java</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.xbean</groupId>
            <artifactId>xbean-asm7-shaded</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-launcher_${scala.binary.version}</artifactId>
            <version>2.4.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-kvstore_${scala.binary.version}</artifactId>
            <version>2.4.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-network-common_${scala.binary.version}</artifactId>
            <version>2.4.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-network-shuffle_${scala.binary.version}</artifactId>
            <version>2.4.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-unsafe_${scala.binary.version}</artifactId>
            <version>2.4.3</version>
        </dependency>
        <dependency>
            <groupId>javax.activation</groupId>
            <artifactId>activation</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.curator</groupId>
            <artifactId>curator-recipes</artifactId>
        </dependency>
        <!-- With curator 2.12  SBT/Ivy doesn't get ZK on the build classpath.
             Explicitly declaring it as a dependency fixes this. -->
        <dependency>
            <groupId>org.apache.zookeeper</groupId>
            <artifactId>zookeeper</artifactId>
        </dependency>

        <!-- Jetty dependencies promoted to compile here so they are shaded
             and inlined into spark-core jar -->
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-plus</artifactId>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-security</artifactId>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-util</artifactId>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-server</artifactId>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-http</artifactId>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-continuation</artifactId>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-servlet</artifactId>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-proxy</artifactId>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-client</artifactId>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>org.eclipse.jetty</groupId>
            <artifactId>jetty-servlets</artifactId>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>javax.servlet</groupId>
            <artifactId>javax.servlet-api</artifactId>
            <version>${javaxservlet.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-math3</artifactId>
        </dependency>
        <dependency>
            <groupId>com.google.code.findbugs</groupId>
            <artifactId>jsr305</artifactId>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>jul-to-slf4j</artifactId>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>jcl-over-slf4j</artifactId>
        </dependency>
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
        </dependency>
        <dependency>
            <groupId>com.ning</groupId>
            <artifactId>compress-lzf</artifactId>
        </dependency>
        <dependency>
            <groupId>org.xerial.snappy</groupId>
            <artifactId>snappy-java</artifactId>
        </dependency>
        <dependency>
            <groupId>org.lz4</groupId>
            <artifactId>lz4-java</artifactId>
        </dependency>
        <dependency>
            <groupId>com.github.luben</groupId>
            <artifactId>zstd-jni</artifactId>
        </dependency>
        <dependency>
            <groupId>org.roaringbitmap</groupId>
            <artifactId>RoaringBitmap</artifactId>
        </dependency>
        <dependency>
            <groupId>commons-net</groupId>
            <artifactId>commons-net</artifactId>
        </dependency>

        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-reflect</artifactId>
        </dependency>
        <dependency>
            <groupId>org.json4s</groupId>
            <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
        </dependency>
        <dependency>
            <groupId>org.glassfish.jersey.core</groupId>
            <artifactId>jersey-client</artifactId>
        </dependency>
        <dependency>
            <groupId>org.glassfish.jersey.core</groupId>
            <artifactId>jersey-common</artifactId>
        </dependency>
        <dependency>
            <groupId>org.glassfish.jersey.core</groupId>
            <artifactId>jersey-server</artifactId>
        </dependency>
        <dependency>
            <groupId>org.glassfish.jersey.containers</groupId>
            <artifactId>jersey-container-servlet</artifactId>
        </dependency>
        <dependency>
            <groupId>org.glassfish.jersey.containers</groupId>
            <artifactId>jersey-container-servlet-core</artifactId>
        </dependency>
        <dependency>
            <groupId>io.netty</groupId>
            <artifactId>netty-all</artifactId>
        </dependency>
        <dependency>
            <groupId>io.netty</groupId>
            <artifactId>netty</artifactId>
        </dependency>
        <dependency>
            <groupId>com.clearspring.analytics</groupId>
            <artifactId>stream</artifactId>
        </dependency>
        <dependency>
            <groupId>io.dropwizard.metrics</groupId>
            <artifactId>metrics-core</artifactId>
        </dependency>
        <dependency>
            <groupId>io.dropwizard.metrics</groupId>
            <artifactId>metrics-jvm</artifactId>
        </dependency>
        <dependency>
            <groupId>io.dropwizard.metrics</groupId>
            <artifactId>metrics-json</artifactId>
        </dependency>
        <dependency>
            <groupId>io.dropwizard.metrics</groupId>
            <artifactId>metrics-graphite</artifactId>
        </dependency>
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-databind</artifactId>
        </dependency>
        <dependency>
            <groupId>com.fasterxml.jackson.module</groupId>
            <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.derby</groupId>
            <artifactId>derby</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.ivy</groupId>
            <artifactId>ivy</artifactId>
        </dependency>
        <dependency>
            <groupId>oro</groupId>
            <!-- oro is needed by ivy, but only listed as an optional dependency, so we include it. -->
            <artifactId>oro</artifactId>
            <version>${oro.version}</version>
        </dependency>
        <dependency>
            <groupId>org.seleniumhq.selenium</groupId>
            <artifactId>selenium-java</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.seleniumhq.selenium</groupId>
            <artifactId>selenium-htmlunit-driver</artifactId>
            <scope>test</scope>
        </dependency>
        <!-- Added for selenium: -->
        <dependency>
            <groupId>xml-apis</groupId>
            <artifactId>xml-apis</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.hamcrest</groupId>
            <artifactId>hamcrest-core</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.hamcrest</groupId>
            <artifactId>hamcrest-library</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.mockito</groupId>
            <artifactId>mockito-core</artifactId>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.curator</groupId>
            <artifactId>curator-test</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>net.razorvine</groupId>
            <artifactId>pyrolite</artifactId>
            <version>4.23</version>
            <exclusions>
                <exclusion>
                    <groupId>net.razorvine</groupId>
                    <artifactId>serpent</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>net.sf.py4j</groupId>
            <artifactId>py4j</artifactId>
            <version>0.10.8.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-tags_${scala.binary.version}</artifactId>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-launcher_${scala.binary.version}</artifactId>
            <version>2.4.3</version>
            <classifier>tests</classifier>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-network-shuffle_${scala.binary.version}</artifactId>
            <version>2.4.3</version>
            <classifier>tests</classifier>
            <scope>test</scope>
        </dependency>

        <!--
          This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
          them will yield errors.
        -->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-tags_${scala.binary.version}</artifactId>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-crypto</artifactId>
        </dependency>


        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.12</artifactId>
            <version>3.0.0-SNAPSHOT</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.scalatest</groupId>
            <artifactId>scalatest_2.12</artifactId>
            <version>3.2.0-SNAP10</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.scalacheck</groupId>
            <artifactId>scalacheck_2.12</artifactId>
            <version>1.14.0</version>
            <scope>test</scope>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.json4s/json4s-ast -->
        <dependency>
            <groupId>org.json4s</groupId>
            <artifactId>json4s-ast_2.12</artifactId>
            <version>${json4s.version}</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.scala-lang.modules/scala-xml -->
        <dependency>
            <groupId>org.scala-lang.modules</groupId>
            <artifactId>scala-xml_2.12</artifactId>
            <version>${scalaxml.version}</version>
        </dependency>

    </dependencies>
    <build>
        <sourceDirectory>
            ${project.basedir}/src/main/scala
        </sourceDirectory>
        <plugins>
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.1.3</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>compile</goal>
                            <goal>testCompile</goal>
                            <goal>add-source</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.1.1</version>
                <executions>
                    <execution>
                        <phase>install</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                    </execution>
                </executions>
                <configuration>
                    <filters>
                        <filter>
                            <artifact>*:*</artifact>
                            <excludes>
                                <exclude>META-INF/*.SF</exclude>
                                <exclude>META-INF/*.DSA</exclude>
                                <exclude>META-INF/*.RSA</exclude>
                            </excludes>
                        </filter>
                    </filters>
                    <finalName>${project.artifactId}-${project.version}</finalName>
                </configuration>
            </plugin>

            <plugin>
                <groupId>org.scoverage</groupId>
                <artifactId>scoverage-maven-plugin</artifactId>
                <version>${scoverage.plugin.version}</version>
                <configuration>
                    <scalaVersion>${scala.version}</scalaVersion>
                    <aggregate>true</aggregate>
                    <highlighting>true</highlighting>
                </configuration>
                <executions>
                    <execution>
                        <goals>
                            <goal>report</goal> <!-- or integration-check -->
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

    <reporting>
        <plugins>
            <plugin>
                <groupId>org.scoverage</groupId>
                <artifactId>scoverage-maven-plugin</artifactId>
                <version>${scoverage.plugin.version}</version>
                <reportSets>
                    <reportSet>
                        <reports>
                            <report>report</report>
                            <!-- or <report>integration-report</report> -->
                            <!-- or <report>report-only</report> -->
                        </reports>
                    </reportSet>
                </reportSets>
            </plugin>
        </plugins>
    </reporting>

</project>

spark_core的pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<!--
  ~ Licensed to the Apache Software Foundation (ASF) under one or more
  ~ contributor license agreements.  See the NOTICE file distributed with
  ~ this work for additional information regarding copyright ownership.
  ~ The ASF licenses this file to You under the Apache License, Version 2.0
  ~ (the "License"); you may not use this file except in compliance with
  ~ the License.  You may obtain a copy of the License at
  ~
  ~    http://www.apache.org/licenses/LICENSE-2.0
  ~
  ~ Unless required by applicable law or agreed to in writing, software
  ~ distributed under the License is distributed on an "AS IS" BASIS,
  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  ~ See the License for the specific language governing permissions and
  ~ limitations under the License.
  -->

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <parent>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-parent_2.12</artifactId>
    <version>3.0.0-SNAPSHOT</version>
    <relativePath>../pom.xml</relativePath>
  </parent>

  <artifactId>spark-core_2.12</artifactId>

  <packaging>jar</packaging>
  <name>Spark Project Core</name>
  <url>http://spark.apache.org/</url>

  <build>
    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
    <resources>
      <resource>
        <directory>${project.basedir}/src/main/resources</directory>
      </resource>
      <resource>
        <!-- Include the properties file to provide the build information. -->
        <directory>${project.build.directory}/extra-resources</directory>
        <filtering>true</filtering>
      </resource>
    </resources>
    <sourceDirectory>
      ${project.basedir}/src/main/scala
    </sourceDirectory>
    <plugins>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-antrun-plugin</artifactId>
        <executions>
          <execution>
            <phase>generate-resources</phase>
            <configuration>
              <!-- Execute the shell script to generate the spark build information. -->
              <target>
                <exec executable="bash">
                  <arg value="${project.basedir}/../build/spark-build-info"/>
                  <arg value="${project.build.directory}/extra-resources"/>
                  <arg value="${project.version}"/>
                </exec>
              </target>
            </configuration>
            <goals>
              <goal>run</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-dependency-plugin</artifactId>
        <executions>
          <!-- When using SPARK_PREPEND_CLASSES Spark classes compiled locally don't use
               shaded deps. So here we store jars in their original form which are added
               when the classpath is computed. -->
          <!-- See similar execution in mllib/pom.xml -->
          <execution>
            <id>copy-dependencies</id>
            <phase>package</phase>
            <goals>
              <goal>copy-dependencies</goal>
            </goals>
           <configuration>
              <outputDirectory>${project.build.directory}</outputDirectory>
              <overWriteReleases>false</overWriteReleases>
              <overWriteSnapshots>false</overWriteSnapshots>
              <overWriteIfNewer>true</overWriteIfNewer>
              <useSubDirectoryPerType>true</useSubDirectoryPerType>
              <includeArtifactIds>
                guava,jetty-io,jetty-servlet,jetty-servlets,jetty-continuation,jetty-http,jetty-plus,jetty-util,jetty-server,jetty-security,jetty-proxy,jetty-client
              </includeArtifactIds>
              <silent>true</silent>
            </configuration>
          </execution>
        </executions>
      </plugin>
    </plugins>
  </build>

  <profiles>
    <profile>
      <id>Windows</id>
      <activation>
        <os>
          <family>Windows</family>
        </os>
      </activation>
      <properties>
        <script.extension>.bat</script.extension>
      </properties>
    </profile>
    <profile>
      <id>unix</id>
      <activation>
        <os>
          <family>unix</family>
        </os>
      </activation>
      <properties>
        <script.extension>.sh</script.extension>
      </properties>
    </profile>
    <profile>
      <id>sparkr</id>
      <build>
        <plugins>
          <plugin>
            <groupId>org.codehaus.mojo</groupId>
            <artifactId>exec-maven-plugin</artifactId>
            <version>1.6.0</version>
            <executions>
              <execution>
                <id>sparkr-pkg</id>
                <phase>compile</phase>
                <goals>
                  <goal>exec</goal>
                </goals>
              </execution>
            </executions>
            <configuration>
              <executable>${project.basedir}${file.separator}..${file.separator}R${file.separator}install-dev${script.extension}</executable>
            </configuration>
          </plugin>
        </plugins>
      </build>
    </profile>
  </profiles>

</project>

我能够访问org.apache.spark.SparkConf或SparkContext,但我无法访问Aggregator.scala文件中的org.apache.spark.annotation.DeveloperApi。

任何帮助将不胜感激。

1 个答案:

答案 0 :(得分:0)

您应该添加spark-tags依赖项:

<dependency>
  <groupId>org.apache.spark</groupId>
  <artifactId>spark-tags_2.12</artifactId>
  <version>2.4.3</version>
</dependency>