编码地图缩减器时出现问题

时间:2015-04-20 18:33:47

标签: java input mapreduce output bigdata

我正在为类做一个项目,我必须编写一个map reducer来读取和输出数据源中的数据。我的数据来源是伊利诺伊州最受欢迎的男婴名字。我试图输出我的源中列出的25年中每一年的前5名。我正在使用CSV阅读器代码,我基于目前为止所做的。这是我到目前为止所拥有的。如果有人能指出我正确的方向,我将非常感激。

package nameReader;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;

public class nameJob {

    public static void main(String[] args) throws IOException {
        JobConf conf = new JobConf(nameJob.class);
        conf.setJobName("Name Reader");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);
        conf.setMapOutputKeyClass(Text.class);
        conf.setMapOutputValueClass(Text.class);

        conf.setMapperClass(boyNameMapper.class);
        conf.setReducerClass(boyNameReducer.class);

        FileInputFormat.addInputPath(conf, new Path(args[0]));
        FileOutputFormat.setOutputPath(conf, new Path(args[1]));
        JobClient.runJob(conf);
    }
}

Mapper

package nameReader;

import java.io.IOException;
import java.io.StringReader;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

import com.opencsv.CSVReader;

import java.util.regex.*;

public class boyNameMapper extends MapReduceBase
    implements Mapper<LongWritable, Text, Text, Text> {

    private Text Rank = new Text();
    private Text Year = new Text();
    private Text Name = new Text();
    private Text Frequency = new Text();

    public void map(LongWritable key, Text value,
            OutputCollector<Text, Text> output,
            Reporter reporter) throws IOException {
        String line = value.toString();
        CSVReader R = new CSVReader(new StringReader(line));
        String[] ParsedLine = R.readNext();
        R.close();
        /* todo
        * Check for null and number of columns
        */
        Rank.set(ParsedLine[0]);
        Year.set(ParsedLine[1]);
        Name.set(ParsedLine[2]);
        Frequency.set(ParsedLine[3]);
        output.collect(Rank, Year, Name, Frequency);
    }
}

减速

package nameReader;

import java.io.IOException;
import java.util.Iterator;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;

import mrtools.CountMap;
import mrtools.NBest;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class boyNameReducer extends MapReduceBase
    implements Reducer<Text, Text, Text, Text> {
    NBest<Text> Best = new NBest<Text>(3);
    CountMap<Text> Counts= new CountMap<Text>();
    public void reduce(Text key, Iterator<Text> values,
                    OutputCollector<Text, Text> output,
                    Reporter reporter) throws IOException {

        Counts.clear();
        Counts.countInto(values);

        Best.clear();
        Best.putMap(Counts);
        output.collect(key,new Text(Best.bestEntryCountTSV(",")));

    }
}

0 个答案:

没有答案