在Mapreduce Java中停止单词消除

时间:2019-02-11 17:07:16

标签: java hadoop mapreduce hdfs

我正在编写一个MapReduce程序,用于消除给定输入中的停用词,并且我的程序正在运行,但未显示输出。 我应该从文件中读取数据。

  

“ INMapReduceuce。工作:正在运行的工作:job_1549892265940_0006”。

输入:

 Hello this is abc. 
 Hello this is abc from xyz

输出:

Hello abc
Hello abc xyz

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class StopWord {
    public static class Valuemap extends Mapper<LongWritable,Text,Text,IntWritable>
    {
        ArrayList<String> list = new ArrayList<>(Arrays.asList("a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do", "does", "doing", "down", "during", "each", "few", "for",
                "from", "further", "had", "has", "have", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "it", "it's", "its", "itself",
                "let's", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought", "our", "ours", 
                "ourselves", "out", "over", "own", "same", "she", "she'd", "she'll", "she's", "should", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", 
                "we", "we'd", "we'll", "we're", "we've", "were", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", 
                "why", "why's", "with", "would", "you", "you'd", "you'll", "you're", "you've",
                 "your", "yours", "yourself", "yourselves"));

        private static final IntWritable one = new IntWritable(1);
        private static final IntWritable zero = new IntWritable(0);
        public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException
        {
            //String[] words = value.toString().toLowerCase().split(" ");
            StringTokenizer tokens = new StringTokenizer(value.toString().toLowerCase());
            while(tokens.hasMoreTokens())
            {
                String string = tokens.nextToken();
                if(list.contains(string))
                {
                context.write(new Text(string), zero);
                }
                else
                {
                    context.write(new Text(string), one);
                }
            }
        }
    }

public class Valuereduce extends Reducer<Text, IntWritable, Text, NullWritable>
{
    protected void reduce(Text key,Iterable<IntWritable> values,Reducer<Text, IntWritable, Text, NullWritable>.Context context) throws IOException,InterruptedException
    {
        int sum = 0;
        for(IntWritable value : values)
        {
            sum += value.get();
            if(sum != 0)
                context.write(key, NullWritable.get());
        }
    }
}
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    @SuppressWarnings("deprecation")
    Job job = new Job(conf,"StopWordElimination");
    job.setJarByClass(StopWord.class);
    job.setMapperClass(Valuemap.class);
    job.setReducerClass(Valuereduce.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

0 个答案:

没有答案