为什么MapReduce会从我的mapper而不是reducer中输出我的键值对,这有什么可能的原因吗?
我正在使用Hadoop来运行MapReduce Java程序。我已经设置了mapper来输出值为8或9的IntWritables。我已经将reducer设置为5或17的输出值。
我最终得到以下结果:
AAKASH LAKSHMANAN 9
AALIYAH HARRISON 9
AARON CARMACK 9
AARON CRAIG 9
AARON GOLD 9
AARON LAWSON 9
AARON LEVINSON 9
AARON NAZIR 9
.....
如此清楚地来自mapper函数的值。输出数据中的键也没有合并。
完整的代码在这里:
//Task 3: List of people who visited at least once in both 2009 and 2010.
package org.myorg;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Task3 {
public static class Map
extends Mapper<LongWritable, Text, Text, IntWritable> {
protected void setup(Context context) throws IOException, InterruptedException {
}
public void map(LongWritable key,
Text value,
Context context) throws IOException {
try{
String line = value.toString();
String[] lineArray = line.split(",");
String yearRaw;
String name;
name = lineArray[1] + " " + lineArray[0];
yearRaw = lineArray[10];
String[] year1Arr = yearRaw.split("[ /]");
int y1, y2;
if (year1Arr.length==4 && year1Arr[2] != null) {
y1 = Integer.parseInt(year1Arr[2]);
} else {
return;
}
String year2Raw = lineArray[11];
String[] year2Arr = year2Raw.split("[ /]");
if (year2Arr.length > 3 && year2Arr[2] != null) {
y2 = Integer.parseInt(year2Arr[2]);
} else {
return;
}
if ((y1 == 2009) || (y2 == 2009)) {
context.write(new Text(name), new IntWritable(8));
}
if ((y1 == 2010) || (y2 == 2010)) {
context.write(new Text(name), new IntWritable(9));
}
} catch(InterruptedException e) {
System.out.println("Interrupted Exception");
}
}
protected void cleanup(Context context) throws IOException, InterruptedException {
}
}
public static class Reduce
extends Reducer<Text, IntWritable, Text, IntWritable> {
protected void setup(Context context) throws IOException, InterruptedException {
}
public void reduce(Text key,
Iterator<IntWritable> values,
Context context) throws IOException {
try {
int y1 = 0;
int y2 = 0;
IntWritable value;
while (values.hasNext()) {
value = values.next();
if (value.get() == 8) {
y1 += 1;
} else if (value.get() == 9) {
y2 += 1;
}
}
if ((y1 + y2) > 1) {
context.write(key, new IntWritable(5));
} else {
context.write(key, new IntWritable(17));
}
}catch (InterruptedException e) {
System.out.println("Interrupted Exception");
}
}
protected void cleanup(Context context) throws IOException, InterruptedException {
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: task3 <in> <out>");
System.exit(2);
}
// Creates a MapReduce job and links it to our class
Job job = Job.getInstance(conf);
job.setJarByClass(Task3.class);
conf.set("mapreduce.output.textoutputformat.separator", ",");
// Selects mapper/combiner/reducer
job.setMapperClass(Map.class);
//job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
// This says that (k1, v1) should be read from text files
// and that (k3, v3) should be written to text files
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// The paths of these input/output are from application arguments
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
// Finally, run the job!
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}