无法在Dataflow SDK 2.1.0中创建可执行Jar

时间:2017-12-08 08:15:15

标签: java maven jar google-cloud-dataflow

我使用以下pom文件创建了Jar。

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>XXXXXXXXXXXXX</groupId>
<artifactId>XXXXXXXXXXXX</artifactId>
<version>XXXXXXXXXXXXX</version>
<packaging>jar</packaging>

<properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <java.version>1.8</java.version>
    <dependency.locations.enabled>false</dependency.locations.enabled>
</properties>

<build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <version>3.1</version>
            <configuration>
                <source>${java.version}</source>
                <target>${java.version}</target>
                <compilerArgs>
                    <arg>-Xlint:all</arg>
                    <arg>-Xlint:-processing</arg>
                </compilerArgs>
            </configuration>
        </plugin>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-assembly-plugin</artifactId>
            <version>2.4.1</version>
            <configuration>
                <!-- get all project dependencies -->
                <descriptorRefs>
                    <descriptorRef>jar-with-dependencies</descriptorRef>
                </descriptorRefs>
                <!-- MainClass in mainfest make a executable jar -->
                <archive>
                    <manifest>
                        <mainClass>XXXXXXXXXXXXXXXX</mainClass>
                    </manifest>
                </archive>
            </configuration>
            <executions>
                <execution>
                    <id>make-assembly</id>
                    <!-- bind to the packaging phase -->
                    <phase>package</phase>
                    <goals>
                        <goal>single</goal>
                    </goals>
                </execution>
            </executions>
        </plugin>
        <plugin>
            <groupId>org.jacoco</groupId>
            <artifactId>jacoco-maven-plugin</artifactId>
            <version>0.7.9</version>
            <executions>
                 <execution>
                     <id>prepare-agent</id>
                     <goals>
                         <goal>prepare-agent</goal>
                     </goals>
                 </execution>
            </executions>
        </plugin>
    </plugins>
</build>

<reporting>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-project-info-reports-plugin</artifactId>
            <version>2.9</version>
            <reportSets>
                <reportSet>
                    <reports>
                        <report>summary</report>
                    </reports>
                </reportSet>
            </reportSets>
        </plugin>

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-jxr-plugin</artifactId>
            <version>2.5</version>
        </plugin>

        <plugin>
            <groupId>org.codehaus.mojo</groupId>
            <artifactId>findbugs-maven-plugin</artifactId>
            <version>3.0.4</version>
            <!--<version>3.0.5-SNAPSHOT</version> -->
            <configuration>
                <effort>Max</effort>
                <threshold>Low</threshold>
                <xmlOutput>true</xmlOutput>
            </configuration>
        </plugin>

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-pmd-plugin</artifactId>
            <version>3.7</version>
            <configuration>
                <rulesets>
                    <ruleset>/rulesets/java/basic.xml</ruleset>
                    <ruleset>/rulesets/java/design.xml</ruleset>
                </rulesets>
            </configuration>
        </plugin>

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-checkstyle-plugin</artifactId>
            <version>2.17</version>
            <reportSets>
                <reportSet>
                    <reports>
                        <report>checkstyle</report>
                    </reports>
                </reportSet>
            </reportSets>
        </plugin>

    </plugins>
</reporting>

<dependencies>
    <dependency>
        <groupId>com.google.cloud.dataflow</groupId>
        <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
        <version>2.1.0</version>
    </dependency>

    <dependency>
        <groupId>com.google.cloud</groupId>
        <artifactId>google-cloud-storage</artifactId>
        <version>1.2.0</version>
    </dependency>

    <dependency>
        <groupId>javax.servlet</groupId>
        <artifactId>javax.servlet-api</artifactId>
        <version>3.1.0</version>
    </dependency>

    <dependency>
        <groupId>org.jmockit</groupId>
        <artifactId>jmockit</artifactId>
        <version>1.30</version>
    </dependency>

    <dependency>
        <groupId>org.mockito</groupId>
        <artifactId>mockito-all</artifactId>
        <version>1.10.19</version>
        <scope>test</scope>
    </dependency>

    <!-- Hamcrest and JUnit are required dependencies of DataflowAssert, which 
        is used in the main code of DebuggingWordCount example. -->

    <dependency>
        <groupId>org.hamcrest</groupId>
        <artifactId>hamcrest-all</artifactId>
        <version>1.3</version>
    </dependency>

    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.12</version>
    </dependency>

    <dependency>
        <groupId>commons-lang</groupId>
        <artifactId>commons-lang</artifactId>
        <version>2.6</version>
    </dependency>
    <dependency>
        <groupId>ch.qos.logback</groupId>
        <artifactId>logback-classic</artifactId>
        <version>1.0.7</version>
    </dependency>
</dependencies>

maven命令

mvn clean
mvn compile
mvn package

Jar命令执行

 java -jar XXXXXX-jar-with-dependencies.jar
      --runner=DataflowRunner
      --jobName=XXXXXXXXXX 
      --project=XXXXXXXXXX 
      --network=XXXXXXXXXX
      --subnetwork=XXXXXXXXXXXX 
      --workerMachineType=XXXXXXX 
      --region=XXXXXXXXXX 
      --maxNumWorkers=XXX 
      --stagingLocation=XXXXXXXXXX 
      --tempLocation=XXXXXXXXXXXX
      --dataflowJobFile=XXXXXXXXXX

发生以下错误

java.lang.IllegalArgumentException: Unknown 'runner' specified 'DataflowRunner', supported pipeline runners [DirectRunner]
at org.apache.beam.sdk.options.PipelineOptionsFactory.parseObjects(PipelineOptionsFactory.java:1615)
at org.apache.beam.sdk.options.PipelineOptionsFactory.access$400(PipelineOptionsFactory.java:104)
at org.apache.beam.sdk.options.PipelineOptionsFactory$Builder.as(PipelineOptionsFactory.java:291)
Caused by: java.lang.ClassNotFoundException: DataflowRunner
at java.net.URLClassLoader.findClass(URLClassLoader.java:381) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:264)

作为上述错误的解决方法,我们将以下内容添加到pom文件中,并仅在测试时使用DirectRunner。

    <dependency>
        <groupId>org.apache.beam</groupId>
        <artifactId>beam-runners-direct-java</artifactId>
        <version>2.1.0</version>
        <scope>test</scope>
    </dependency>

使用上述解决方法执行时,发生以下错误。

java.lang.IllegalArgumentException: Unable to infer a coder and no Coder was specified. Please set a coder by invoking Create.withCoder() explicitly.
at org.apache.beam.sdk.transforms.Create$Values.expand(Create.java:318) 
at org.apache.beam.sdk.transforms.Create$Values.expand(Create.java:268)
at org.apache.beam.sdk.Pipeline.applyInternal(Pipeline.java:514)
at org.apache.beam.sdk.Pipeline.applyTransform(Pipeline.java:473)
at org.apache.beam.sdk.values.PBegin.apply(PBegin.java:56)
at org.apache.beam.sdk.Pipeline.apply(Pipeline.java:180)
at org.apache.beam.sdk.io.gcp.bigquery.BatchLoads.expand(BatchLoads.java:344)
at org.apache.beam.sdk.io.gcp.bigquery.BatchLoads.expand(BatchLoads.java:67)
at org.apache.beam.sdk.Pipeline.applyInternal(Pipeline.java:514) 
at org.apache.beam.sdk.Pipeline.applyTransform(Pipeline.java:454)
at org.apache.beam.sdk.values.PCollection.apply(PCollection.java:284)
at org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$Write.expandTyped(BigQueryIO.java:1019)
at org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$Write.expand(BigQueryIO.java:972)
at org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$Write.expand(BigQueryIO.java:659)
at org.apache.beam.sdk.Pipeline.applyInternal(Pipeline.java:514)
at org.apache.beam.sdk.Pipeline.applyTransform(Pipeline.java:473)
at org.apache.beam.sdk.values.PCollection.apply(PCollection.java:297)
Caused by: org.apache.beam.sdk.coders.CannotProvideCoderException: Unable to provide a Coder for com.google.api.services.bigquery.model.TableRow.
Building a Coder using a registered CoderProvider failed.
See suppressed exceptions for detailed failures.

作为上述错误的解决方法,setCoder已设置,但结果不会更改。

在SDK 1.9.0中,我们可以使用maven创建可执行的Jar。

如果可以创建Jar,你能给我一些如何更改pom文件以及如何运行jar的提示吗?

1 个答案:

答案 0 :(得分:1)

您是否尝试过包含beam-runners-google-cloud-dataflow-java依赖项?

https://beam.apache.org/documentation/runners/dataflow/