我在flink中运行自己编写的bfs。这是代码。但是当执行某些并行时。我有16台机器(96 GB内存)和每个任务管理器20个任务槽。我将并行性设置为80.该程序将在加入步骤中停滞不前。
public class T2AdjMessage {
private static final Integer STARTLEVEL = 1;
private static Integer STARTPOINT = 2;
private static String graphfile;
public static void main(String[] args) throws Exception{
ParameterTool params = ParameterTool.fromArgs(args);
graphfile = params.get("file");
STARTPOINT = params.getInt("start", 2);
final int maxIterations = params.getInt("iterations", 20);
// get environment
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(params.getInt("para",200));
图表阅读
//EdgeInput
DataSet<Tuple2<Integer, Integer>> edgesInput = env.readTextFile(graphfile).map(
new MapFunction<String, Tuple2<Integer, Integer>>() {
@Override
public Tuple2<Integer, Integer> map(String s) throws Exception {
String[] parts = s.split("\t");
if (parts.length != 2) {
throw new Exception("Input file error");
}
return new Tuple2<>(Integer.parseInt(parts[0].trim()), Integer.parseInt(parts[1].trim()));
}
}
).partitionByHash(0);
DataSet<Tuple2<Integer, Iterable<Integer>>> adjlist = edgesInput.groupBy(0).reduceGroup(
new GroupReduceFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Iterable<Integer>>>() {
@Override
public void reduce(Iterable<Tuple2<Integer, Integer>> values, Collector<Tuple2<Integer, Iterable<Integer>>> out)
throws Exception {
ArrayList<Integer> al = new ArrayList<>();
int source = 0;
for(Tuple2<Integer,Integer> i : values){
al.add(i.f1);
source = i.f0;
}
out.collect(new Tuple2<Integer, Iterable<Integer>>(source, al));
}
}
);
//Vertex Input
DataSet<Tuple2<Integer, Integer>> vertexWithLevel = adjlist.flatMap(
new FlatMapFunction<Tuple2<Integer, Iterable<Integer>>, Tuple2<Integer, Integer>>() {
@Override
public void flatMap(Tuple2<Integer, Iterable<Integer>> value, Collector<Tuple2<Integer, Integer>> out)
throws Exception {
Integer u = value.f0;
out.collect(new Tuple2<Integer, Integer>(u, Integer.MAX_VALUE));
for (Integer i : value.f1){
out.collect(new Tuple2<Integer, Integer>(i, Integer.MAX_VALUE));
}
}
}
).groupBy(0).reduce(
new ReduceFunction<Tuple2<Integer, Integer>>() {
@Override
public Tuple2<Integer, Integer> reduce(Tuple2<Integer, Integer> value1, Tuple2<Integer, Integer> value2)
throws Exception {
return value1.f1 < value2.f1 ? value1:value2;
}
}
).partitionByHash(0);
DataSet<Tuple2<Integer, Integer>> pointInQ = env.fromElements(new Tuple2<Integer, Integer>(STARTPOINT,STARTLEVEL));
迭代开始
final DeltaIteration< Tuple2<Integer, Integer>, Tuple2<Integer, Integer> > iterationDelta =
vertexWithLevel.iterateDelta(pointInQ, maxIterations,0);
setUpIteration(iterationDelta, env);
DataSet<Tuple2<Integer,Integer>> activeV = iterationDelta.getSolutionSet().coGroup(iterationDelta.getWorkset())
.where(0).equalTo(0).with(
new CoGroupFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>>() {
@Override
public void coGroup(Iterable<Tuple2<Integer, Integer>> first, Iterable<Tuple2<Integer, Integer>> second,
Collector<Tuple2<Integer, Integer>> out) throws Exception {
Tuple2<Integer,Integer> x = first.iterator().next();
if(x != null){
int level = x.f1;
for (Tuple2<Integer, Integer> i : second){
if(level > i.f1){
level = i.f1;
}
}
if(level < x.f1){
out.collect(new Tuple2<Integer, Integer>(x.f0, level));
}
}
}
}
);
//程序将停留在加入步骤
// adjcent list join activeV produce message
DataSet<Tuple2<Integer,Integer>> message = adjlist.join(activeV).where(0).equalTo(0).with(
new RichFlatJoinFunction<Tuple2<Integer, Iterable<Integer>>, Tuple2<Integer, Integer>, Tuple2<Integer,
Integer>>() {
@Override
public void join(Tuple2<Integer, Iterable<Integer>> first, Tuple2<Integer, Integer> second,
Collector<Tuple2<Integer, Integer>> out) {
for (Integer x : first.f1) {
out.collect(new Tuple2<Integer, Integer>(x, second.f1 + 1));
}
}
}
);
DataSet<Tuple2<Integer, Integer>> finalVertexLevel = iterationDelta.closeWith(activeV, message);
if (params.has("output")) {
finalVertexLevel.writeAsCsv(params.get("output"), System.lineSeparator(), " ", OVERWRITE);
env.execute("Join Edgeset");
} else {
System.out.println("Printing result to stdout. Use --output to specify output path.");
}
}
private static void setUpIteration(DeltaIteration<?,?> iteration, ExecutionEnvironment env){
iteration.parallelism(env.getParallelism());
iteration.name("Join Edgeset");
}
}