在hadoop配置中设置布尔参数的问题

时间:2014-04-07 13:05:04

标签: hadoop

我有以下代码:

这是驱动程序: 我在这里设置了布尔参数caseSensitive。

package stubs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class AvgWordLength extends Configured implements Tool{

  public static void main(String[] args) throws Exception {
      int exitCode = ToolRunner.run(new Configuration(), new AvgWordLength(), args);
      System.exit(exitCode);
  }

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub
    /*
     * Validate that two arguments were passed from the command line.
     */
    if (args.length != 2) {
      System.out.printf("Usage: AvgWordLength <input dir> <output dir>\n");
      return -1;
    }

    /*
     * Instantiate a Job object for your job's configuration. 
     */
    Job job = new Job(getConf());
    getConf().setBoolean("caseSensitive",true);
    /*
     * Specify the jar file that contains your driver, mapper, and reducer.
     * Hadoop will transfer this jar file to nodes in your cluster running 
     * mapper and reducer tasks.
     */
    job.setJarByClass(AvgWordLength.class);

    /*
     * Specify an easily-decipherable name for the job.
     * This job name will appear in reports and logs.
     */
    job.setJobName("Average Word Length");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LetterMapper.class);
    job.setReducerClass(AverageReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    /*
     * TODO implement
     */

    /*
     * Start the MapReduce job and wait for it to finish.
     * If it finishes successfully, return 0. If not, return 1.
     */
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}
}

在mapper中我使用参数:

package stubs;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class LetterMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

boolean isCaseSensitive;
  @Override
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {
    System.out.println("isCaseSensitive:"+isCaseSensitive);
    String line = value.toString();
    for (String word: line.split("\\W+")) {
        if(word.length()>0) {
            String key1 = isCaseSensitive ? word.substring(0, 1) : word.substring(0,1).toUpperCase();
            context.write(new Text(key1), new IntWritable(word.length()));
        }
    }

  }

@Override
protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
        throws IOException, InterruptedException {
    // TODO Auto-generated method stub
    super.setup(context);
    Configuration conf = context.getConfiguration();
    isCaseSensitive = conf.getBoolean("caseSensitive", false);
}
}

然而这是错误的。 我不明白为什么

getConf().setBoolean("caseSensitive",true);

无效

1 个答案:

答案 0 :(得分:0)

我找到了答案

以下是驱动程序的更新代码:

package stubs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class AvgWordLength extends Configured implements Tool{

  public static void main(String[] args) throws Exception {
      Configuration conf = new Configuration();
      conf.setBoolean("caseSensitive",true);
      int exitCode = ToolRunner.run(conf, new AvgWordLength(), args);
      System.exit(exitCode);
  }

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub
    /*
     * Validate that two arguments were passed from the command line.
     */
    if (args.length != 2) {
      System.out.printf("Usage: AvgWordLength <input dir> <output dir>\n");
      return -1;
    }

    /*
     * Instantiate a Job object for your job's configuration. 
     */
    Job job = new Job(getConf());

    /*
     * Specify the jar file that contains your driver, mapper, and reducer.
     * Hadoop will transfer this jar file to nodes in your cluster running 
     * mapper and reducer tasks.
     */
    job.setJarByClass(AvgWordLength.class);

    /*
     * Specify an easily-decipherable name for the job.
     * This job name will appear in reports and logs.
     */
    job.setJobName("Average Word Length");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LetterMapper.class);
    job.setReducerClass(AverageReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    /*
     * TODO implement
     */

    /*
     * Start the MapReduce job and wait for it to finish.
     * If it finishes successfully, return 0. If not, return 1.
     */
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}
}

在传递给Job的构造函数之前,必须完成作业的配置。

之后的每次更改都无法识别