我在Google Cloud Dataflow上运行光束管道。但是,管道无法使用完全相同的代码进行更新。管道看起来像pipeline overview。代码如下
import com.google.common.collect.Iterables;
import com.google.common.primitives.Ints;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.GenerateSequence;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.View;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PCollectionView;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class PipelineTest {
private static final Logger logger = LoggerFactory.getLogger(PipelineTest.class);
public static void main(String[] args) {
int[] shit = new int[1000];
for (int i = 0; i < shit.length; i++) {
shit[i] = i * i;
}
PipelineOptions options = PipelineOptionsFactory.create();
Pipeline pipeline = Pipeline.create(options);
PCollection<Iterable<Integer>> sideInput =
pipeline.apply("Create", Create.<Iterable<Integer>>of(Ints.asList(shit)));
PCollectionView<Iterable<Integer>> view =
sideInput.apply("CreateSideInput", View.asSingleton());
PCollection<String> done =
pipeline
.apply(
"FakeData",
GenerateSequence.from(0).to(50_000).withRate(10, Duration.standardSeconds(1)))
.apply(
"Map1",
ParDo.of(
new DoFn<Long, String>() {
@ProcessElement
public void processElement(ProcessContext ctx) {
Long element = ctx.element();
Iterable<Integer> v = ctx.sideInput(view);
String out = "element " + element + ", value " + Iterables.size(v);
logger.info("MAP1: " + out);
ctx.output(out);
}
})
.withSideInputs(view))
.apply(
"Map2",
ParDo.of(
new DoFn<String, String>() {
@ProcessElement
public void processElement(ProcessContext ctx) {
String element = ctx.element();
Iterable<Integer> v = ctx.sideInput(view);
String out = "element " + element + ", value " + Iterables.size(v);
logger.info("MAP2: " + out);
ctx.output(out);
}
})
.withSideInputs(view));
}
}
我尝试为视图提供默认值以及使用两个视图。但是,它们都不起作用。如果视图用于两个独立的转换,则可以更新管道。