Mapreduce错误:输出地图数据而不是减少

时间:2014-04-25 18:24:43

标签: java hadoop mapreduce

为什么MapReduce会从我的mapper而不是reducer中输出我的键值对,这有什么可能的原因吗?

我正在使用Hadoop来运行MapReduce Java程序。我已经设置了mapper来输出值为8或9的IntWritables。我已经将reducer设置为5或17的输出值。

我最终得到以下结果:

AAKASH LAKSHMANAN   9
AALIYAH HARRISON    9
AARON CARMACK   9
AARON CRAIG 9
AARON GOLD  9
AARON LAWSON    9
AARON LEVINSON  9
AARON NAZIR 9

.....

如此清楚地来自mapper函数的值。输出数据中的键也没有合并。

完整的代码在这里:

//Task 3: List of people who visited at least once in both 2009 and 2010.

package org.myorg;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.*;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Task3 {

public static class Map
       extends Mapper<LongWritable, Text, Text, IntWritable> {

    protected void setup(Context context) throws IOException, InterruptedException {
    }

    public void map(LongWritable key,
                    Text value,
                    Context context) throws IOException {
        try{

          String line = value.toString();

          String[] lineArray = line.split(",");
          String yearRaw;
          String name;
          name = lineArray[1] + " " + lineArray[0];
          yearRaw = lineArray[10];

          String[] year1Arr = yearRaw.split("[ /]");
          int y1, y2;

          if (year1Arr.length==4 && year1Arr[2] != null) {
             y1 = Integer.parseInt(year1Arr[2]);
          } else {
            return;
          }

          String year2Raw = lineArray[11];
          String[] year2Arr = year2Raw.split("[ /]");
          if (year2Arr.length > 3 && year2Arr[2] != null) {
             y2 = Integer.parseInt(year2Arr[2]);
          } else {
            return;
          }

          if ((y1 == 2009) || (y2 == 2009)) {
             context.write(new Text(name), new IntWritable(8));
          }

          if ((y1 == 2010) || (y2 == 2010)) {
             context.write(new Text(name), new IntWritable(9));
          }
        } catch(InterruptedException e) {
          System.out.println("Interrupted Exception");
        }
    }

    protected void cleanup(Context context) throws IOException, InterruptedException {
    }
}


public static class Reduce
              extends Reducer<Text, IntWritable, Text, IntWritable> {

    protected void setup(Context context) throws IOException, InterruptedException {
    }
    public void reduce(Text key,
                       Iterator<IntWritable> values,
                       Context context) throws IOException {

        try {
          int y1 = 0;
          int y2 = 0;

          IntWritable value;

          while (values.hasNext()) {
              value = values.next();

              if (value.get() == 8) {
                 y1 += 1;
              } else if (value.get() == 9) {
                y2 += 1;
              }

          }

          if ((y1 + y2) > 1) {
             context.write(key, new IntWritable(5));
          } else {
            context.write(key, new IntWritable(17));
          }

        }catch (InterruptedException e) {
            System.out.println("Interrupted Exception");
        }
    }

    protected void cleanup(Context context) throws IOException, InterruptedException {
    }
}



public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: task3 <in> <out>");
        System.exit(2);
    }
    // Creates a MapReduce job and links it to our class
    Job job = Job.getInstance(conf);
    job.setJarByClass(Task3.class);
    conf.set("mapreduce.output.textoutputformat.separator", ",");

    // Selects mapper/combiner/reducer
    job.setMapperClass(Map.class);
    //job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);

    // This says that (k1, v1) should be read from text files
    // and that (k3, v3) should be written to text files
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // The paths of these input/output are from application arguments
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    // Finally, run the job!
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}

0 个答案:

没有答案