无法在Dataflow上更新Apache beam管道

时间:2018-03-02 11:29:47

标签: google-cloud-dataflow apache-beam

我在Google Cloud Dataflow上运行光束管道。但是,管道无法使用完全相同的代码进行更新。管道看起来像pipeline overview。代码如下

import com.google.common.collect.Iterables;
import com.google.common.primitives.Ints;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.GenerateSequence;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.View;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionView;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PipelineTest {
  private static final Logger logger = LoggerFactory.getLogger(PipelineTest.class);

  public static void main(String[] args) {
    int[] shit = new int[1000];
    for (int i = 0; i < shit.length; i++) {
      shit[i] = i * i;
    }

    PipelineOptions options = PipelineOptionsFactory.create();

    Pipeline pipeline = Pipeline.create(options);

    PCollection<Iterable<Integer>> sideInput =
        pipeline.apply("Create", Create.<Iterable<Integer>>of(Ints.asList(shit)));

    PCollectionView<Iterable<Integer>> view =
        sideInput.apply("CreateSideInput", View.asSingleton());

    PCollection<String> done =
        pipeline
            .apply(
                "FakeData",
                GenerateSequence.from(0).to(50_000).withRate(10, Duration.standardSeconds(1)))
            .apply(
                "Map1",
                ParDo.of(
                        new DoFn<Long, String>() {

                          @ProcessElement
                          public void processElement(ProcessContext ctx) {
                            Long element = ctx.element();

                            Iterable<Integer> v = ctx.sideInput(view);

                            String out = "element " + element + ", value " + Iterables.size(v);

                            logger.info("MAP1: " + out);

                            ctx.output(out);
                          }
                        })
                    .withSideInputs(view))
            .apply(
                "Map2",
                ParDo.of(
                        new DoFn<String, String>() {

                          @ProcessElement
                          public void processElement(ProcessContext ctx) {
                            String element = ctx.element();

                            Iterable<Integer> v = ctx.sideInput(view);

                            String out = "element " + element + ", value " + Iterables.size(v);

                            logger.info("MAP2: " + out);

                            ctx.output(out);
                          }
                        })
                    .withSideInputs(view));
  }
}

我尝试为视图提供默认值以及使用两个视图。但是,它们都不起作用。如果视图用于两个独立的转换,则可以更新管道。

0 个答案:

没有答案