标签: java hadoop mapreduce

我不是Hadoop专家,并且遇到以下问题。我有一项工作必须在具有Hadoop 0.20.2版的集群上运行。 当我开始工作时,我会指定一些参数。我想将其中的两个传递给mapper并根据需要减少类。


    package bigdata;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeMap;

import org.apache.commons.math3.stat.regression.SimpleRegression;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.io.ParseException;
import com.vividsolutions.jts.io.WKTReader;

public class BoxCount extends Configured implements Tool{

    private static String mbr;
    private static double cs;

    public static class Map extends Mapper<LongWritable, Text, IntWritable, Text> implements JobConfigurable 
        public void configure(JobConf job) {
            mbr = job.get(mbr);
            cs = job.getDouble("cellSide", 0.1);

        protected void setup(Context context) 
                throws IOException, InterruptedException {
            // metodo in cui leggere l'MBR passato come parametro
            System.out.println("mbr: " + mbr + "\ncs: " + cs);
            // ...

        public void map(LongWritable key, Text value, Context context) 
                throws IOException, InterruptedException {
            // some code here

        protected void cleanup(Context context) throws IOException, InterruptedException 
            // other code


  public static class Reduce extends Reducer<IntWritable,Text,IntWritable,IntWritable>implements JobConfigurable  
      private static String mbr;
      private static double cs;

      public void configure(JobConf job) {
          mbr = job.get(mbr);
          cs = job.getDouble("cellSide", 0.1);

      protected void setup(Context context) throws IOException, InterruptedException 
          System.out.println("mbr: " + mbr + " cs: " + cs);


      public void reduce(IntWritable key, Iterable<Text> values, Context context) 
              throws IOException, InterruptedException {
          //the reduce code

    protected void cleanup(Context context)
                throws IOException, InterruptedException {

          // cleanup code   


  public BoxCount (String[] args) {
      if (args.length != 4) {
          //                                             0                                  1           2               3
          System.out.println("Usage: OneGrid <mbr (Rectangle: (xmin,ymin)-(xmax,ymax))> <cell_Side> <input_path> <output_path>");
          System.out.println("args.length = "+args.length);
          for(int i = 0; i< args.length;i++)
              System.out.println("args["+i+"]"+" = "+args[i]);
      this.numReducers = 1;
      //this.mbr = new String(args[0]);
      // this.mbr = "Rectangle: (0.01,0.01)-(99.99,99.99)";
      // per sierpinski_jts
      this.mbr = "Rectangle: (0.0,0.0)-(100.01,86.6125)";
      // per diagonale
      //this.mbr = "Rectangle: (1.5104351688932738,1.0787616413335854)-(99999.3453727045,99999.98043392139)";
      // per uniforme
      // this.mbr = "Rectangle: (0.3020720559407146,0.2163091760095974)-(99999.68881210628,99999.46079314972)";

      this.cellSide = Double.parseDouble(args[1]);
      this.inputPath = new Path(args[2]);
      this.outputDir = new Path(args[3]);
      // Ricalcola la cellSize in modo da ottenere 
      // almeno minMunGriglie (10) griglie!
      Grid g = new Grid(mbr, cellSide);
      if ((this.cellSide*(Math.pow(2,minNumGriglie))) > g.width) 
          this.cellSide = g.width/(Math.pow(2,minNumGriglie));

  public static void main(String[] args) throws Exception {
      int res = ToolRunner.run(new Configuration(), new BoxCount(args), args);

  public int run(String[] args) throws Exception 
      // define new job instead of null using conf
      Configuration conf = getConf();
      Job job = new Job(conf, "BoxCount");
      // conf.set("mapreduce.framework.name", "local");
      // conf.set("mapreduce.jobtracker.address", "local");
      // conf.set("fs.defaultFS","file:///");

      // passo il valore mbr per creare la griglia
      conf.set("mbr", mbr);

      // passo lato cella 
      conf.setDouble("cellSide", cellSide); 

      // set job input format

      // set map class and the map output key and value classes

      // set reduce class and the reduce output key and value classes

      // set job output format

      // add the input file as job input (from HDFS) to the variable
      // inputFile
      TextInputFormat.setInputPaths(job, inputPath);

      // set the output path for the job results (to HDFS) to the variable
      // outputPath
      TextOutputFormat.setOutputPath(job, outputDir);

      // set the number of reducers using variable numberReducers

      // set the jar class

      return job.waitForCompletion(true) ? 0 : 1; // this will execute the job



0 个答案:
