使用单独的映射器,减速器和驱动程序类运行MR程序

时间:2016-04-08 15:07:56

标签: java hadoop mapreduce

maxtempmapper.java类:

    package com.hadoop.gskCodeBase.maxTemp;

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


public class MaxTempMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
    private static final int MISSING=9999;

    @Override
    public void map(LongWritable kay,Text value,Context context) throws IOException,InterruptedException {
        String line = value.toString();
        String year = line.substring(15,19);
        int airTemperature;
        if(line.charAt(87)== '+'){
            airTemperature=Integer.parseInt(line.substring(88, 92));
        }else{
            airTemperature=Integer.parseInt(line.substring(87, 92));
        }
        String quality=line.substring(92,93);
        if(airTemperature !=MISSING && quality.matches("[01459]")){
            context.write(new Text(year), new IntWritable(airTemperature));
        }
    }

}

maxtempreducer.java类:

 package com.hadoop.gskCodeBase.maxTemp;

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;



public class MaxTempReducer extends Reducer<Text,IntWritable,Text,IntWritable> {

    @Override
    public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException {
        int maxValue = Integer.MIN_VALUE;
        for(IntWritable value : values){
            maxValue=Math.max(maxValue, value.get());
        }
        context.write(key, new IntWritable(maxValue));
    }

}

maxtempdriver.java类:

    package com.hadoop.gskCodeBase.maxTemp;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class MaxTempDriver extends Configured implements Tool{
    public int run(String[] args) throws Exception{
        if(args.length !=2){
            System.err.println("UsageTemperatureDriver <input path> <outputpath>");
            System.exit(-1);
        }

         Job job = Job.getInstance();
         job.setJarByClass(MaxTempDriver.class);
         job.setJobName("Max Temperature");

         FileInputFormat.addInputPath(job, new Path(args[0]));
         FileOutputFormat.setOutputPath(job,new Path(args[1]));

         job.setMapperClass(MaxTempMapper.class);
         job.setReducerClass(MaxTempReducer.class);

         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(IntWritable.class);

         System.exit(job.waitForCompletion(true) ? 0:1); 
         boolean success = job.waitForCompletion(true);
         return success ? 0 : 1;
    }

    public static void main(String[] args) throws Exception {
         MaxTempDriver driver = new MaxTempDriver();
         int exitCode = ToolRunner.run(driver, args);
         System.exit(exitCode);
         }


}

我必须使用命令提示符在Windows上的单节点hadoop集群上执行上述三个类 有人可以帮助我如何在命令提示符(Windows)上执行这三个类?

1 个答案:

答案 0 :(得分:0)

将所有java文件存档到单个.jar文件中。然后像往常一样运行它。在Windows中,通过Cygwin终端运行Hadoop更容易。您可以通过以下命令执行作业:

    hadoop jar <path to .jar> <path to input folder in hdfs> <path to output folder in hdfs>

例如:

    hadoop jar wordcount.jar /input /output

<强> -UPDATE -

  1. 您应该将您的驱动程序类分配给job.setJarByClass()。在这种情况下,它将是您的MaxTempDriver.class

  2. 在eclipse中,您可以通过右键单击源文件夹&gt;创建一个jar文件。出口&gt; JAR文件。从那里你可以按照步骤。您也可以在此过程中设置主类。

  3. 希望这能回答你的问题。