如何在MapReduce中创建自定义Mapper类

时间:2019-02-26 23:59:55

标签: java mapreduce

我有一个独特的要求,我必须从文本文件传递zip shell命令,并且mapper将处理仅使用mapper以并行方式创建zip文件的脚本。我正在考虑在Java中使用exec执行shell命令。我对如何实现自定义映射器有些困惑,因为我的输出将是压缩格式。

下面是我的映射器类-

import java.io.IOException;
import java.util.StringTokenizer;    
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class Map extends Mapper<LongWritable, Text, Text, NullWritable>{    

    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{

        String line= value.toString();
        StringTokenizer tokenizer= new StringTokenizer(line);

        while(tokenizer.hasMoreTokens()){
            value.set(tokenizer.nextToken());
            context.write(value,NullWritable.get());        
        }


    }

}

处理器类-

import org.apache.hadoop.util.ToolRunner;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;

public class ZipProcessor extends Configured implements Tool {

    public static void main(String [] args) throws Exception{
        int exitCode = ToolRunner.run(new ZipProcessor(), args);
        System.exit(exitCode);
    }

    public int run(String[] args) throws Exception {

        if(args.length!=2){

             System.err.printf("Usage: %s needs two arguments, input and output files\n", getClass().getSimpleName());
             return -1;
        }

        Configuration conf=new Configuration();

        Job job = Job.getInstance(conf,"zipping");
        job.setJarByClass(ZipProcessor.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputFormatClass(TextOutputFormat.class);    
        job.setMapperClass(Map.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));


        int returnValue = job.waitForCompletion(true) ? 0:1;

        if(job.isSuccessful()) {
            System.out.println("Job was successful");
        } else if(!job.isSuccessful()) {
            System.out.println("Job was not successful");           
        }

        return returnValue;


    }

}

mapr.txt样本

zip -r "/folder1/file.zip" "sourceFolder"
zip -r "/folder2/file.zip" "sourceFolder"
zip -r "/folder3/file.zip" "sourceFolder"

0 个答案:

没有答案