将代码从hadoop 0.20.2迁移到hadoop 2.4.1

时间:2014-10-21 10:33:50

标签: java hadoop mapreduce

我正在尝试从Hadoop 0.20.2 java代码迁移到Hadoop 2.4.1。 我正在使用mapreduce API。 当我试图运行该作业时,我收到以下错误: java.io.IOException:无法初始化Cluster。请检查您的配置mapreduce.framework.name和相应的服务器地址。

我的项目是一个java项目,我试图简单地执行它。 (当我用0.20.2时它起作用了) 以下是代码示例:

public class HadoopTool extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
    PropertiesConfiguration props = getConfigurationProperties(args);
    Configuration conf = getJobConfiguration(props);
    Job job = Job.getInstance(conf);
    jobSetup(props, job);
    if (!job.waitForCompletion(true)) {
        System.out.println("Error in hadoop tool");
        return 1;
    }
    System.out.println("Finish hadoop tool successfully");
    return 0;
}

/**
 * @param props
 * @param job
 * @throws IOException
 * @throws ClassNotFoundException
 */
private void jobSetup(PropertiesConfiguration props, Job job) throws IOException, ClassNotFoundException {
    job.setJobName("Hadoop tool");
    String inputDir = ;//input dir 
    FileInputFormat.addInputPaths(job, inputDir);
    String outputDir = ; //outputdir
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setNumReduceTasks(1);
    job.setJarByClass(HadoopTool.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(HadoopMapper.class);
    job.setMapOutputKeyClass(IdNamePair.class);
    job.setMapOutputValueClass(Text.class);
    job.setGroupingComparatorClass(Grouper.class);
    job.setPartitionerClass(Partitioner.class);
    job.setReducerClass(Reducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NameValuePair.class);
    String outputClassName = ; // my output class
    Class<?> outputClass = Class.forName(outputClassName);
    job.setOutputFormatClass((Class<? extends OutputFormat>) outputClass);
}

/**
 * @param props
 * @return
 * @throws IllegalAccessException
 * @throws InstantiationException
 * @throws ClassNotFoundException
 * @throws IOException
 */
private Configuration getJobConfiguration(PropertiesConfiguration props) throws IllegalAccessException,
        InstantiationException, ClassNotFoundException, IOException {
    Configuration conf = getConf();
    // when running on Windows, must do that before FileSystem is first
    // acquired
    if (org.apache.lucene.util.Constants.WINDOWS) {
        conf.set("fs.file.impl", IgnoreSetPermissionsFileSystem.class.getName());
    }
    conf.setBoolean("mapred.mapper.new-api", true);
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    return conf;
}

/**
 * @param args
 * @return
 * @throws FileNotFoundException
 * @throws ConfigurationException
 * @throws IOException
 */
private PropertiesConfiguration getConfigurationProperties(String[] args) throws FileNotFoundException,
        ConfigurationException, IOException {
    PropertiesConfiguration props = new PropertiesConfiguration();
    final InputStream in = new FileInputStream(new File(args[0]));
    try {
        props.load(in);
    } finally {
        in.close();
    }
    return props;
}

public static int runHadoopTool(String[] args) throws Exception {
    Configuration conf = new Configuration();
    int runRes = ToolRunner.run(conf, new HadoopTool(), args);
    return runRes;
}

public static void main(String[] args) throws Exception {
    if (args.length == 0) {
        System.err.println("Usage: " + HadoopTool.class.getName() + " [config file]");
        System.exit(1);
    }
    int runRes = HadoopTool.runHadoopTool(args);
    System.exit(runRes);
}

我真的很感激帮助。 谢谢

谢伊

0 个答案:

没有答案