如何在本地设置Dataflow管道运行器?

时间:2018-10-11 21:19:08

标签: google-cloud-dataflow

我正在尝试使用IntelliJ从本地计算机复制此示例。

https://github.com/GoogleCloudPlatform/cloud-bigtable-examples/blob/master/java/dataflow-connector-examples/src/main/java/com/google/cloud/bigtable/dataflow/example/HelloWorldWrite.java

这是我的Java代码

import java.util.Map;

import org.apache.beam.sdk.Pipeline.*;

import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.*;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;

import org.apache.beam.sdk.transforms.Create;
import com.google.api.services.bigquery.model.TableRow;
import com.google.cloud.bigtable.beam.CloudBigtableIO;
import com.google.cloud.bigtable.beam.CloudBigtableTableConfiguration;



public class BigQueryBigTableTransfer {

    private static final byte[] FAMILY = Bytes.toBytes("RC");
    private static final byte[] QUALIFIER = Bytes.toBytes("col1");

    // This is a random value so that there will be some changes in the table
    // each time the job runs.
    private static final byte[] VALUE = Bytes.toBytes("value_" + (60 * Math.random()));

    // [START bigtable_dataflow_connector_process_element]
    static final DoFn<String, Mutation> MUTATION_TRANSFORM = new DoFn<String, Mutation>() {
        private static final long serialVersionUID = 1L;

        @ProcessElement
        public void processElement(DoFn<String, Mutation>.ProcessContext c) throws Exception {
            c.output(new Put(c.element().getBytes()).addColumn(FAMILY, QUALIFIER, VALUE));
        }
    };
    // [END bigtable_dataflow_connector_process_element]

    // Main method for Testing
    public static void main(String[] args) {
        System.out.println("Write to Big Table pipeline started !! ");
        // START [ bigtable data flow connector create pipeline ]
        PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();

        Pipeline p = Pipeline.create(options);
        // END [ bigtable data flow connector create pipeline ]

        String PROJECT_ID = "my-project";
        String INSTANCE_ID = "my-instance";
        String TABLE_ID = "my-table" ;

        // START connector config
        CloudBigtableTableConfiguration config =
                new CloudBigtableTableConfiguration.Builder()
                        .withProjectId(PROJECT_ID)
                        .withInstanceId(INSTANCE_ID)
                        .withTableId(TABLE_ID)
                        .build();
        // END bigtable data flow connector config

        // START bigtable data flow connector write helloworld

        System.out.println("Pipeline create MUTATION TRX begin .... ");

        p.apply(Create.of("Hello", "World"))
                .apply(ParDo.of(MUTATION_TRANSFORM))
                .apply(CloudBigtableIO.writeToTable(config));

        System.out.println("Pipeline run machine begin .... ");

        p.run().waitUntilFinish();

        System.out.println("Write to Big Table pipeline ended!! ");



    }


}

这是我的POM.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org</groupId>
    <artifactId>google-dataflow-test</artifactId>
    <version>beta</version>

    <dependencies>
        <dependency>
            <groupId>com.google.cloud.dataflow</groupId>
            <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
            <version>2.0.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.beam</groupId>
            <artifactId>beam-runners-direct-java</artifactId>
            <version>2.4.0</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>com.google.cloud.bigtable</groupId>
            <artifactId>bigtable-hbase-beam</artifactId>
            <version>1.5.0</version>
        </dependency>

    </dependencies>

</project>

运行代码时,我遇到以下问题-

Exception in thread "main" java.lang.IllegalArgumentException: No Runner was specified and the DirectRunner was not found on the classpath.
Specify a runner by either:
    Explicitly specifying a runner by providing the 'runner' property
    Adding the DirectRunner to the classpath
    Calling 'PipelineOptions.setRunner(PipelineRunner)' directly
    at org.apache.beam.sdk.options.PipelineOptions$DirectRunner.create(PipelineOptions.java:285)
    at org.apache.beam.sdk.options.PipelineOptions$DirectRunner.create(PipelineOptions.java:275)
    at org.apache.beam.sdk.options.ProxyInvocationHandler.returnDefaultHelper(ProxyInvocationHandler.java:575)
    at org.apache.beam.sdk.options.ProxyInvocationHandler.getDefault(ProxyInvocationHandler.java:516)
    at org.apache.beam.sdk.options.ProxyInvocationHandler.invoke(ProxyInvocationHandler.java:154)
    at org.apache.beam.sdk.options.PipelineOptionsValidator.validate(PipelineOptionsValidator.java:70)
    at org.apache.beam.sdk.PipelineRunner.fromOptions(PipelineRunner.java:41)
    at org.apache.beam.sdk.Pipeline.create(Pipeline.java:141)

在本地运行时如何指定我的runner

0 个答案:

没有答案