Hadoop多个作业 - 它不会退出 - 需要Ctrl + C.

时间:2014-04-22 08:35:52

标签: hadoop

我尝试运行多个工作,但效果很好。问题是当第三个作业完成执行时。它返回预期的输出,但应用程序不退出。每次我都应该用ctrl + c来退出。这是我的main方法:

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    String[] otherArgs = new GenericOptionsParser(conf, args)
            .getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: app <in> <out>");
        System.exit(2);
    }
    // first job
    ControlledJob cjob1 = new ControlledJob(conf);
    cjob1.setJobName("First Job");
    Job job1 = cjob1.getJob();

    job1.setJarByClass(MultipleJobs.class);
    job1.setMapperClass(Mapper1.class);
    job1.setReducerClass(Reducer1.class);
    job1.setMapOutputKeyClass(Text.class);
    job1.setMapOutputValueClass(Text.class);
    job1.setOutputKeyClass(NullWritable.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job1, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job1, new Path("temp1"));

    // second job
    ControlledJob cjob2 = new ControlledJob(conf);
    cjob2.setJobName("SecondJob");
    cjob2.addDependingJob(cjob1); 
    Job job2 = cjob2.getJob();

    job2.setJarByClass(MultipleJobs.class);
    job2.setMapperClass(Mapper2.class);
    job2.setCombinerClass(Reducer2.class);
    job2.setReducerClass(Reducer2.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(IntWritable.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job2, new Path("temp1"));
    FileOutputFormat.setOutputPath(job2, new Path("temp2"));

    // third job
    ControlledJob cjob3 = new ControlledJob(conf);
    cjob3.setJobName("Third Job");
    cjob3.addDependingJob(cjob2); 
    Job job3 = cjob3.getJob();

    job3.setJarByClass(MultipleJobs.class);
    job3.setReducerClass(Reducer3.class);
    job3.setMapperClass(Mapper3.class);
    job3.setMapOutputKeyClass(NullWritable.class);
    job3.setMapOutputValueClass(Text.class);
    job3.setOutputKeyClass(Text.class);
    job3.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job3, new Path("temp2"));
    FileOutputFormat.setOutputPath(job3, new Path(otherArgs[1]));

    JobControl control = new JobControl("Controller");
    control.addJob(cjob1);
    control.addJob(cjob2);
    control.addJob(cjob3);

    control.run();
}

和启动命令:

 hadoop jar MJ.jar MultipleJobs input output

这是连接多个工作的正确方法吗?我应该添加/更改什么以避免&c; ctrl + c&#39;在整个执行结束时?

1 个答案:

答案 0 :(得分:2)

是的,您可以像这样链接多个职位。 Check this

为了避免代码中的ctrl + D. 你可以做到

/*Entire configuration for job1*/
job1.waitForCompletion(true);

/*Entire configuration for job2*/
job2.waitForCompletion(true);

/*Entire configuration for job3*/
return job3.waitForCompletion(true) ? 0 : 1;

<强>更新

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();

String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: app <in> <out>");
System.exit(2);
}
// first job
Job job1 = new Job(conf, "job1");
job1.setJarByClass(MultipleJobs.class);
job1.setMapperClass(Mapper1.class);
job1.setReducerClass(Reducer1.class);
job1.setMapOutputKeyClass(Text.class);
job1.setMapOutputValueClass(Text.class);
job1.setOutputKeyClass(NullWritable.class);
job1.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job1, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job1, new Path("temp1"));
job1.waitForCompletion(true);
// second job
Configuration conf2 = getConf();
Job job2 = new Job(conf2, "job2");
job2.setJarByClass(MultipleJobs.class);
job2.setMapperClass(Mapper2.class);
job2.setCombinerClass(Reducer2.class);
job2.setReducerClass(Reducer2.class);
job2.setMapOutputKeyClass(Text.class);
job2.setMapOutputValueClass(IntWritable.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job2, new Path("temp1"));
FileOutputFormat.setOutputPath(job2, new Path("temp2"));
job2.waitForCompletion(true);

// third job
Configuration conf3 = getConf();
Job job3 = new Job(conf3, "job3");
job3.setJarByClass(MultipleJobs.class);
job3.setReducerClass(Reducer3.class);
job3.setMapperClass(Mapper3.class);
job3.setMapOutputKeyClass(NullWritable.class);
job3.setMapOutputValueClass(Text.class);
job3.setOutputKeyClass(Text.class);
job3.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job3, new Path("temp2"));
FileOutputFormat.setOutputPath(job3, new Path(otherArgs[1]));

return job3.waitForCompletion(true) ? 0 : 1;

}