作为初学者,我试图实现自定义输入格式程序。直到Mapper,它工作正常,但是我实现的是Reducer,而不是我实现的Reducer(默认的reducer被调用)(Mapper的相同输出将被打印在文件part-r-0000中)。
我检查了程序的签名。还正确地给出了两个阶段的键和值类。甚至还给出了减速器任务的数量。
我无法找到为什么减速器没有被执行。我也检查了其他线程,但没有运气。
驱动程序
package cif;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Driver extends Configured implements Tool {
public static void main(String[] args) throws Exception {
int exitCode=ToolRunner.run(new Driver(), args);
System.out.println(" Program Ends :: Exit Code ="+exitCode);
}
@Override
public int run(String[] args) throws Exception {
Job job= Job.getInstance(getConf());
job.setInputFormatClass(XmlInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setJarByClass(Driver.class);
job.setMapperClass(CifMapper.class);
job.setReducerClass(CifReducer.class);
job.setCombinerClass(CifReducer.class);
job.setNumReduceTasks(4);
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
FileSystem fs=FileSystem.get(getConf());
if(fs.exists(new Path(args[1]))){
fs.delete(new Path(args[1]), true);
}
return job.waitForCompletion(true)?0:1;
}
}
XMLInputFormat文件
package cif;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class XmlInputFormat extends FileInputFormat<Text,Text>{
@Override
protected boolean isSplitable(JobContext c, Path file){
return true;
}
@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split,
TaskAttemptContext context) throws IOException, InterruptedException {
System.out.println("Enter CreateRecord");
XmlRecordReader reader=new XmlRecordReader();
reader.initialize(split, context);
return reader;
}
}
XMLRecordReader
package cif;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
public class XmlRecordReader extends RecordReader<Text,Text> {
public String startTag="<Employee>";
public String endTag="</Employee>";
public String eidStartTag ="<eid>";
public String eidEndTag="</eid>";
public String locationStartTag="<location>";
public String locationEndTag="</location>";
public static String v="";
public static int startTagSync=0;
public Text key = new Text();
public Text value = new Text();
LineRecordReader lineReader;
public LongWritable lineKey;
public XmlRecordReader() throws IOException{
}
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
this.lineReader=new LineRecordReader();
this.lineReader.initialize(split, context);
}
@Override
public void close() throws IOException {
lineReader.close();
}
@Override
public Text getCurrentKey() throws IOException,
InterruptedException {
//System.out.println("returning key : "+key);
return key;
}
@Override
public Text getCurrentValue() throws IOException, InterruptedException {
//System.out.println(" Returning value :"+ value);
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
return lineReader.getProgress();
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
//System.out.println(" Enter nextKeyValue");
if(!lineReader.nextKeyValue()){
System.out.println("End of File");
return false;
}
String line=lineReader.getCurrentValue().toString();
if(line.contains(startTag))
++startTagSync;
while(startTagSync>0){
lineReader.nextKeyValue();
//System.out.println("key "+lineReader.getCurrentKey());
line=lineReader.getCurrentValue().toString();
//System.out.println(" line --"+line);
if(line.contains(endTag))
--startTagSync;
if(startTagSync>0){
if(line.contains(eidStartTag)){
line=line.substring(eidStartTag.length()+2);
int index=line.indexOf(eidEndTag);
v=line.substring(0, index);
value.set(new Text(line.substring(0, index)));
// System.out.println(line);
}
if(line.contains(locationStartTag)){
line.trim();
line=line.substring(locationStartTag.length()+2);
/// System.out.println("line :"+line);
int index=line.indexOf(locationEndTag);
//key.set(new Text(line.substring(0,index)));
v=line.substring(0,index);
key.set(new Text(v));
}
}
}
return true;
}
}
CifMapper
package cif;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class CifMapper extends Mapper<Text,Text,Text,IntWritable>{
public static IntWritable one= new IntWritable(1);
public void map(Text key,Text value,Context context)throws IOException,InterruptedException{
//System.out.println("Entering Mapper");
context.write(new Text(key.toString()), one);
System.out.println("Exiting mapper ::"+key.toString() +" "+one);
}
}
CifReducer
package cif;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class CifReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
public int count;
public void reducer(Text key,Iterable <IntWritable> values,Context context) throws Exception{
System.out.println(" entering reducer");
count=0;
Iterator<IntWritable> iter=values.iterator();
while(iter.hasNext()){
iter.next();
count++;
}
context.write(key,new IntWritable(count));
System.out.println(" Exiting reducer");
}
}
输入
<Employee>
<eid>1</eid>
<location>Bangalore</location>
</Employee>
<Employee>
<eid>2</eid>
<location>Bangalore</location>
</Employee>
<Employee>
<eid>3</eid>
<location>BangaloreNorth</location>
</Employee>
<Employee>
<eid>4</eid>
<location>Chennaii</location>
</Employee>
输出
Bangalore 1
Bangalore 1
BangaloreNorth 1
Chennaii 1
Chennaii 1
答案 0 :(得分:0)
Java覆盖规则:
重写方法不得抛出新的或更广泛的已检查异常。
你的reduce方法正在抛出Exception
,即它正在扩大已检查的异常。
public void reducer(Text key,Iterable <IntWritable> values,Context context) throws Exception{
取代它应该是:
public void reducer(Text key,Iterable <IntWritable> values,Context context) throws IOException, InterruptedException {
答案 1 :(得分:0)
非常抱歉这个愚蠢的错误。但是想到把它放在一边可能有助于某人
public void reducer(Text key,Iterable <IntWritable> values,Context context) throws Exception
必须替换为
public void reduce(Text key,Iterable <IntWritable> values,Context context) throws Exception
我没有覆盖reduce方法,而是编写了reducer,因此没有调用它。