我有以下代码:
这是驱动程序: 我在这里设置了布尔参数caseSensitive。
package stubs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class AvgWordLength extends Configured implements Tool{
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Configuration(), new AvgWordLength(), args);
System.exit(exitCode);
}
@Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
/*
* Validate that two arguments were passed from the command line.
*/
if (args.length != 2) {
System.out.printf("Usage: AvgWordLength <input dir> <output dir>\n");
return -1;
}
/*
* Instantiate a Job object for your job's configuration.
*/
Job job = new Job(getConf());
getConf().setBoolean("caseSensitive",true);
/*
* Specify the jar file that contains your driver, mapper, and reducer.
* Hadoop will transfer this jar file to nodes in your cluster running
* mapper and reducer tasks.
*/
job.setJarByClass(AvgWordLength.class);
/*
* Specify an easily-decipherable name for the job.
* This job name will appear in reports and logs.
*/
job.setJobName("Average Word Length");
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(LetterMapper.class);
job.setReducerClass(AverageReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
/*
* TODO implement
*/
/*
* Start the MapReduce job and wait for it to finish.
* If it finishes successfully, return 0. If not, return 1.
*/
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
}
在mapper中我使用参数:
package stubs;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class LetterMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
boolean isCaseSensitive;
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
System.out.println("isCaseSensitive:"+isCaseSensitive);
String line = value.toString();
for (String word: line.split("\\W+")) {
if(word.length()>0) {
String key1 = isCaseSensitive ? word.substring(0, 1) : word.substring(0,1).toUpperCase();
context.write(new Text(key1), new IntWritable(word.length()));
}
}
}
@Override
protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
super.setup(context);
Configuration conf = context.getConfiguration();
isCaseSensitive = conf.getBoolean("caseSensitive", false);
}
}
然而这是错误的。 我不明白为什么
getConf().setBoolean("caseSensitive",true);
无效
答案 0 :(得分:0)
我找到了答案
以下是驱动程序的更新代码:
package stubs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class AvgWordLength extends Configured implements Tool{
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setBoolean("caseSensitive",true);
int exitCode = ToolRunner.run(conf, new AvgWordLength(), args);
System.exit(exitCode);
}
@Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
/*
* Validate that two arguments were passed from the command line.
*/
if (args.length != 2) {
System.out.printf("Usage: AvgWordLength <input dir> <output dir>\n");
return -1;
}
/*
* Instantiate a Job object for your job's configuration.
*/
Job job = new Job(getConf());
/*
* Specify the jar file that contains your driver, mapper, and reducer.
* Hadoop will transfer this jar file to nodes in your cluster running
* mapper and reducer tasks.
*/
job.setJarByClass(AvgWordLength.class);
/*
* Specify an easily-decipherable name for the job.
* This job name will appear in reports and logs.
*/
job.setJobName("Average Word Length");
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(LetterMapper.class);
job.setReducerClass(AverageReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
/*
* TODO implement
*/
/*
* Start the MapReduce job and wait for it to finish.
* If it finishes successfully, return 0. If not, return 1.
*/
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
}
在传递给Job的构造函数之前,必须完成作业的配置。
之后的每次更改都无法识别